diff options
author | Keith Whitwell <keithw@vmware.com> | 2010-10-17 19:03:42 -0700 |
---|---|---|
committer | Keith Whitwell <keithw@vmware.com> | 2010-10-17 19:09:42 -0700 |
commit | 0072acd447dc6be652e63752e50215c3105322c8 (patch) | |
tree | 847d1763b54772d336a04e606f8248291c3092b7 /src/gallium | |
parent | 543fb77ddece7e1806e8eaa0d65bb2a945ef9a75 (diff) | |
parent | ca2b2ac131933b4171b519813df1aaa3a81621cd (diff) |
Merge remote branch 'origin/master' into lp-setup-llvm
Conflicts:
src/gallium/drivers/llvmpipe/lp_setup_coef.c
src/gallium/drivers/llvmpipe/lp_setup_coef.h
src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
src/gallium/drivers/llvmpipe/lp_setup_point.c
src/gallium/drivers/llvmpipe/lp_setup_tri.c
src/gallium/drivers/llvmpipe/lp_state_derived.c
src/gallium/drivers/llvmpipe/lp_state_fs.h
Diffstat (limited to 'src/gallium')
440 files changed, 44710 insertions, 16530 deletions
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index bff399ec64..036c11986e 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -40,7 +40,7 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURC touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(GENERATED_SOURCES) 2> /dev/null -$(PROGS): % : %.o +$(PROGS): % : %.o $(PROGS_DEPS) $(LD) $(LDFLAGS) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group # Emacs tags diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 2de764c4ee..abd33f6eef 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -8,6 +8,7 @@ C_SOURCES = \ cso_cache/cso_context.c \ cso_cache/cso_hash.c \ draw/draw_context.c \ + draw/draw_fs.c \ draw/draw_gs.c \ draw/draw_pipe.c \ draw/draw_pipe_aaline.c \ @@ -121,6 +122,7 @@ C_SOURCES = \ util/u_handle_table.c \ util/u_hash.c \ util/u_hash_table.c \ + util/u_index_modify.c \ util/u_keymap.c \ util/u_linear.c \ util/u_linkage.c \ @@ -174,6 +176,7 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_struct.c \ gallivm/lp_bld_swizzle.c \ gallivm/lp_bld_tgsi_aos.c \ + gallivm/lp_bld_tgsi_info.c \ gallivm/lp_bld_tgsi_soa.c \ gallivm/lp_bld_type.c \ draw/draw_llvm.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 294df30094..94cd74424a 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -54,6 +54,7 @@ source = [ 'cso_cache/cso_context.c', 'cso_cache/cso_hash.c', 'draw/draw_context.c', + 'draw/draw_fs.c', 'draw/draw_gs.c', 'draw/draw_pipe.c', 'draw/draw_pipe_aaline.c', @@ -170,6 +171,7 @@ source = [ 'util/u_handle_table.c', 'util/u_hash.c', 'util/u_hash_table.c', + 'util/u_index_modify.c', 'util/u_keymap.c', 'util/u_linear.c', 'util/u_linkage.c', @@ -225,6 +227,7 @@ if env['llvm']: 'gallivm/lp_bld_struct.c', 'gallivm/lp_bld_swizzle.c', 'gallivm/lp_bld_tgsi_aos.c', + 'gallivm/lp_bld_tgsi_info.c', 'gallivm/lp_bld_tgsi_soa.c', 'gallivm/lp_bld_type.c', 'draw/draw_llvm.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 937b093479..40f654643b 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -413,6 +413,42 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable ) } + +/** + * Allocate an extra vertex/geometry shader vertex attribute. + * This is used by some of the optional draw module stages such + * as wide_point which may need to allocate additional generic/texcoord + * attributes. + */ +int +draw_alloc_extra_vertex_attrib(struct draw_context *draw, + uint semantic_name, uint semantic_index) +{ + const int num_outputs = draw_current_shader_outputs(draw); + const int n = draw->extra_shader_outputs.num; + + assert(n < Elements(draw->extra_shader_outputs.semantic_name)); + + draw->extra_shader_outputs.semantic_name[n] = semantic_name; + draw->extra_shader_outputs.semantic_index[n] = semantic_index; + draw->extra_shader_outputs.slot[n] = num_outputs + n; + draw->extra_shader_outputs.num++; + + return draw->extra_shader_outputs.slot[n]; +} + + +/** + * Remove all extra vertex attributes that were allocated with + * draw_alloc_extra_vertex_attrib(). + */ +void +draw_remove_extra_vertex_attribs(struct draw_context *draw) +{ + draw->extra_shader_outputs.num = 0; +} + + /** * Ask the draw module for the location/slot of the given vertex attribute in * a post-transformed vertex. @@ -446,12 +482,12 @@ draw_find_shader_output(const struct draw_context *draw, return i; } - /* XXX there may be more than one extra vertex attrib. - * For example, simulated gl_FragCoord and gl_PointCoord. - */ - if (draw->extra_shader_outputs.semantic_name == semantic_name && - draw->extra_shader_outputs.semantic_index == semantic_index) { - return draw->extra_shader_outputs.slot; + /* Search the extra vertex attributes */ + for (i = 0; i < draw->extra_shader_outputs.num; i++) { + if (draw->extra_shader_outputs.semantic_name[i] == semantic_name && + draw->extra_shader_outputs.semantic_index[i] == semantic_index) { + return draw->extra_shader_outputs.slot[i]; + } } return 0; @@ -470,16 +506,18 @@ draw_find_shader_output(const struct draw_context *draw, uint draw_num_shader_outputs(const struct draw_context *draw) { - uint count = draw->vs.vertex_shader->info.num_outputs; + uint count; /* If a geometry shader is present, its outputs go to the * driver, else the vertex shader's outputs. */ if (draw->gs.geometry_shader) count = draw->gs.geometry_shader->info.num_outputs; + else + count = draw->vs.vertex_shader->info.num_outputs; + + count += draw->extra_shader_outputs.num; - if (draw->extra_shader_outputs.slot > 0) - count++; return count; } @@ -671,6 +709,11 @@ draw_set_samplers(struct draw_context *draw, draw->samplers[i] = NULL; draw->num_samplers = num; + +#ifdef HAVE_LLVM + if (draw->llvm) + draw_llvm_set_sampler_state(draw); +#endif } void @@ -678,9 +721,9 @@ draw_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]) + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]) { #ifdef HAVE_LLVM if(draw->llvm) diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 4f0d30123a..ff4f753604 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -46,9 +46,9 @@ struct draw_context; struct draw_stage; struct draw_vertex_shader; struct draw_geometry_shader; +struct draw_fragment_shader; struct tgsi_sampler; -#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ struct draw_context *draw_create( struct pipe_context *pipe ); @@ -119,9 +119,9 @@ draw_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]); + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]); /* @@ -138,6 +138,17 @@ void draw_delete_vertex_shader(struct draw_context *draw, /* + * Fragment shader functions + */ +struct draw_fragment_shader * +draw_create_fragment_shader(struct draw_context *draw, + const struct pipe_shader_state *shader); +void draw_bind_fragment_shader(struct draw_context *draw, + struct draw_fragment_shader *dvs); +void draw_delete_fragment_shader(struct draw_context *draw, + struct draw_fragment_shader *dvs); + +/* * Geometry shader functions */ struct draw_geometry_shader * diff --git a/src/gallium/auxiliary/draw/draw_fs.c b/src/gallium/auxiliary/draw/draw_fs.c new file mode 100644 index 0000000000..1543bd86f1 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_fs.c @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_shader_tokens.h" + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + +#include "tgsi/tgsi_parse.h" + +#include "draw_fs.h" +#include "draw_private.h" +#include "draw_context.h" + + +struct draw_fragment_shader * +draw_create_fragment_shader(struct draw_context *draw, + const struct pipe_shader_state *shader) +{ + struct draw_fragment_shader *dfs; + + dfs = CALLOC_STRUCT(draw_fragment_shader); + if (dfs) { + dfs->base = *shader; + tgsi_scan_shader(shader->tokens, &dfs->info); + } + + return dfs; +} + + +void +draw_bind_fragment_shader(struct draw_context *draw, + struct draw_fragment_shader *dfs) +{ + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); + + draw->fs.fragment_shader = dfs; +} + + +void +draw_delete_fragment_shader(struct draw_context *draw, + struct draw_fragment_shader *dfs) +{ + FREE(dfs); +} + diff --git a/src/gallium/auxiliary/draw/draw_fs.h b/src/gallium/auxiliary/draw/draw_fs.h new file mode 100644 index 0000000000..44995b8277 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_fs.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DRAW_FS_H +#define DRAW_FS_H + + +#include "tgsi/tgsi_scan.h" + + +struct draw_fragment_shader +{ + struct pipe_shader_state base; + struct tgsi_shader_info info; +}; + + +#endif /* DRAW_FS_H */ diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8759c38cab..d94340367c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -44,6 +44,7 @@ #include "tgsi/tgsi_dump.h" #include "util/u_cpu_detect.h" +#include "util/u_math.h" #include "util/u_pointer.h" #include "util/u_string.h" @@ -71,12 +72,17 @@ init_globals(struct draw_llvm *llvm) elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), - DRAW_MAX_TEXTURE_LEVELS); + PIPE_MAX_TEXTURE_LEVELS); + elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); + elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); + elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); + elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = + LLVMArrayType(LLVMFloatType(), 4); texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -101,6 +107,18 @@ init_globals(struct draw_llvm *llvm) LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, llvm->target, texture_type, DRAW_JIT_TEXTURE_DATA); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_MIN_LOD); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_MAX_LOD); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_LOD_BIAS); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_BORDER_COLOR); LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, llvm->target, texture_type); @@ -1048,9 +1066,9 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]) + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]) { unsigned j; struct draw_jit_texture *jit_tex; @@ -1072,6 +1090,25 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, } } + +void +draw_llvm_set_sampler_state(struct draw_context *draw) +{ + unsigned i; + + for (i = 0; i < draw->num_samplers; i++) { + struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i]; + + if (draw->samplers[i]) { + jit_tex->min_lod = draw->samplers[i]->min_lod; + jit_tex->max_lod = draw->samplers[i]->max_lod; + jit_tex->lod_bias = draw->samplers[i]->lod_bias; + COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color); + } + } +} + + void draw_llvm_destroy_variant(struct draw_llvm_variant *variant) { diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 6196b2f983..de89b657f3 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -41,7 +41,6 @@ #include <llvm-c/Target.h> #include <llvm-c/ExecutionEngine.h> -#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ struct draw_llvm; struct llvm_vertex_shader; @@ -52,9 +51,13 @@ struct draw_jit_texture uint32_t height; uint32_t depth; uint32_t last_level; - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; - const void *data[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + const void *data[PIPE_MAX_TEXTURE_LEVELS]; + float min_lod; + float max_lod; + float lod_bias; + float border_color[4]; }; enum { @@ -65,6 +68,10 @@ enum { DRAW_JIT_TEXTURE_ROW_STRIDE, DRAW_JIT_TEXTURE_IMG_STRIDE, DRAW_JIT_TEXTURE_DATA, + DRAW_JIT_TEXTURE_MIN_LOD, + DRAW_JIT_TEXTURE_MAX_LOD, + DRAW_JIT_TEXTURE_LOD_BIAS, + DRAW_JIT_TEXTURE_BORDER_COLOR, DRAW_JIT_TEXTURE_NUM_FIELDS /* number of fields above */ }; @@ -275,12 +282,15 @@ draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, LLVMValueRef context_ptr); void +draw_llvm_set_sampler_state(struct draw_context *draw); + +void draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]); + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]); #endif diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c b/src/gallium/auxiliary/draw/draw_llvm_sample.c index e9811010db..ac1fbb179c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_sample.c +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c @@ -146,6 +146,10 @@ DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE) DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE) DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE) DRAW_LLVM_TEXTURE_MEMBER(data_ptr, DRAW_JIT_TEXTURE_DATA, FALSE) +DRAW_LLVM_TEXTURE_MEMBER(min_lod, DRAW_JIT_TEXTURE_MIN_LOD, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(max_lod, DRAW_JIT_TEXTURE_MAX_LOD, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(lod_bias, DRAW_JIT_TEXTURE_LOD_BIAS, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(border_color, DRAW_JIT_TEXTURE_BORDER_COLOR, FALSE) static void @@ -207,6 +211,10 @@ draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride; sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride; sampler->dynamic_state.base.data_ptr = draw_llvm_texture_data_ptr; + sampler->dynamic_state.base.min_lod = draw_llvm_texture_min_lod; + sampler->dynamic_state.base.max_lod = draw_llvm_texture_max_lod; + sampler->dynamic_state.base.lod_bias = draw_llvm_texture_lod_bias; + sampler->dynamic_state.base.border_color = draw_llvm_texture_border_color; sampler->dynamic_state.static_state = static_state; sampler->dynamic_state.context_ptr = context_ptr; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index eac21110be..d1aba76309 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -688,10 +688,9 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) aaline->tex_slot = draw_current_shader_outputs(draw); aaline->pos_slot = draw_current_shader_position_output(draw);; - /* advertise the extra post-transformed vertex attribute */ - draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib; - draw->extra_shader_outputs.slot = aaline->tex_slot; + /* allocate the extra post-transformed vertex attribute */ + (void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC, + aaline->fs->generic_attrib); /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ @@ -744,7 +743,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags) draw->suspend_flushing = FALSE; - draw->extra_shader_outputs.slot = 0; + draw_remove_extra_vertex_attribs(draw); } diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index d406a86ccb..5ea552f51c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -701,9 +701,9 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) aapoint->pos_slot = draw_current_shader_position_output(draw); - draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib; - draw->extra_shader_outputs.slot = aapoint->tex_slot; + /* allocate the extra post-transformed vertex attribute */ + (void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC, + aapoint->fs->generic_attrib); /* find psize slot in post-transform vertex */ aapoint->psize_slot = -1; @@ -754,7 +754,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags) draw->suspend_flushing = FALSE; - draw->extra_shader_outputs.slot = 0; + draw_remove_extra_vertex_attribs(draw); } diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 8a3d499feb..a10d8e9edc 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -263,6 +263,8 @@ do_clip_tri( struct draw_stage *stage, clipmask &= ~(1<<plane_idx); assert(n < MAX_CLIPPED_VERTICES); + if (n >= MAX_CLIPPED_VERTICES) + return; inlist[n] = inlist[0]; /* prevent rotation of vertices */ for (i = 1; i <= n; i++) { @@ -272,16 +274,22 @@ do_clip_tri( struct draw_stage *stage, if (!IS_NEGATIVE(dp_prev)) { assert(outcount < MAX_CLIPPED_VERTICES); + if (outcount >= MAX_CLIPPED_VERTICES) + return; outlist[outcount++] = vert_prev; } if (DIFFERENT_SIGNS(dp, dp_prev)) { struct vertex_header *new_vert; - assert(tmpnr < MAX_CLIPPED_VERTICES+1); + assert(tmpnr < MAX_CLIPPED_VERTICES + 1); + if (tmpnr >= MAX_CLIPPED_VERTICES + 1) + return; new_vert = clipper->stage.tmp[tmpnr++]; assert(outcount < MAX_CLIPPED_VERTICES); + if (outcount >= MAX_CLIPPED_VERTICES) + return; outlist[outcount++] = new_vert; if (IS_NEGATIVE(dp)) { @@ -321,27 +329,32 @@ do_clip_tri( struct draw_stage *stage, /* If flat-shading, copy provoking vertex color to polygon vertex[0] */ - if (clipper->flat) { - if (stage->draw->rasterizer->flatshade_first) { - if (inlist[0] != header->v[0]) { - assert(tmpnr < MAX_CLIPPED_VERTICES + 1); - inlist[0] = dup_vert(stage, inlist[0], tmpnr++); - copy_colors(stage, inlist[0], header->v[0]); + if (n >= 3) { + if (clipper->flat) { + if (stage->draw->rasterizer->flatshade_first) { + if (inlist[0] != header->v[0]) { + assert(tmpnr < MAX_CLIPPED_VERTICES + 1); + if (tmpnr >= MAX_CLIPPED_VERTICES + 1) + return; + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); + copy_colors(stage, inlist[0], header->v[0]); + } } - } - else { - if (inlist[0] != header->v[2]) { - assert(tmpnr < MAX_CLIPPED_VERTICES + 1); - inlist[0] = dup_vert(stage, inlist[0], tmpnr++); - copy_colors(stage, inlist[0], header->v[2]); + else { + if (inlist[0] != header->v[2]) { + assert(tmpnr < MAX_CLIPPED_VERTICES + 1); + if (tmpnr >= MAX_CLIPPED_VERTICES + 1) + return; + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); + copy_colors(stage, inlist[0], header->v[2]); + } } } - } - - /* Emit the polygon as triangles to the setup stage: - */ - if (n >= 3) + + /* Emit the polygon as triangles to the setup stage: + */ emit_poly( stage, inlist, n, header ); + } } diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index 8b92543987..c575a8ac7c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -172,7 +172,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold && !rast->line_smooth); - /* drawing large points? */ + /* drawing large/sprite points (but not AA points)? */ if (rast->sprite_coord_enable && draw->pipeline.point_sprite) wide_points = TRUE; else if (rast->point_smooth && draw->pipeline.aapoint) @@ -207,7 +207,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) precalc_flat = TRUE; } - if (wide_points || rast->sprite_coord_enable) { + if (wide_points) { draw->pipeline.wide_point->next = next; next = draw->pipeline.wide_point; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index ee2945c7c9..3646c6a714 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -57,26 +57,24 @@ #include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "draw_fs.h" #include "draw_vs.h" #include "draw_pipe.h" struct widepoint_stage { - struct draw_stage stage; + struct draw_stage stage; /**< base class */ float half_point_size; float xbias; float ybias; - uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; - uint texcoord_enable[PIPE_MAX_SHADER_OUTPUTS]; - uint num_texcoords; - uint texcoord_mode; + /** for automatic texcoord generation/replacement */ + uint num_texcoord_gen; + uint texcoord_gen_slot[PIPE_MAX_SHADER_OUTPUTS]; int psize_slot; - - int point_coord_fs_input; /**< input for pointcoord */ }; @@ -96,30 +94,20 @@ widepoint_stage( struct draw_stage *stage ) static void set_texcoords(const struct widepoint_stage *wide, struct vertex_header *v, const float tc[4]) { + const struct draw_context *draw = wide->stage.draw; + const struct pipe_rasterizer_state *rast = draw->rasterizer; + const uint texcoord_mode = rast->sprite_coord_mode; uint i; - for (i = 0; i < wide->num_texcoords; i++) { - if (wide->texcoord_enable[i]) { - uint j = wide->texcoord_slot[i]; - v->data[j][0] = tc[0]; - if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) - v->data[j][1] = 1.0f - tc[1]; - else - v->data[j][1] = tc[1]; - v->data[j][2] = tc[2]; - v->data[j][3] = tc[3]; - } - } - if (wide->point_coord_fs_input >= 0) { - /* put gl_PointCoord into the extra vertex slot */ - uint slot = wide->stage.draw->extra_shader_outputs.slot; + for (i = 0; i < wide->num_texcoord_gen; i++) { + const uint slot = wide->texcoord_gen_slot[i]; v->data[slot][0] = tc[0]; - if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) + if (texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) v->data[slot][1] = 1.0f - tc[1]; else v->data[slot][1] = tc[1]; - v->data[slot][2] = 0.0F; - v->data[slot][3] = 1.0F; + v->data[slot][2] = tc[2]; + v->data[slot][3] = tc[3]; } } @@ -201,18 +189,9 @@ static void widepoint_point( struct draw_stage *stage, } -static int -find_pntc_input_attrib(struct draw_context *draw) -{ - /* Scan the fragment program's input decls to find the pointcoord - * attribute. The xy components will store the point coord. - */ - return 0; /* XXX fix this */ -} - - -static void widepoint_first_point( struct draw_stage *stage, - struct prim_header *header ) +static void +widepoint_first_point(struct draw_stage *stage, + struct prim_header *header) { struct widepoint_stage *wide = widepoint_stage(stage); struct draw_context *draw = stage->draw; @@ -244,31 +223,49 @@ static void widepoint_first_point( struct draw_stage *stage, stage->point = draw_pipe_passthrough_point; } + draw_remove_extra_vertex_attribs(draw); + if (rast->point_quad_rasterization) { - /* find vertex shader texcoord outputs */ - const struct draw_vertex_shader *vs = draw->vs.vertex_shader; - uint i, j = 0; - wide->texcoord_mode = rast->sprite_coord_mode; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { - wide->texcoord_slot[j] = i; - wide->texcoord_enable[j] = (rast->sprite_coord_enable >> j) & 1; - j++; + const struct draw_fragment_shader *fs = draw->fs.fragment_shader; + uint i; + + wide->num_texcoord_gen = 0; + + /* Loop over fragment shader inputs looking for generic inputs + * for which bit 'k' in sprite_coord_enable is set. + */ + for (i = 0; i < fs->info.num_inputs; i++) { + if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { + const int generic_index = fs->info.input_semantic_index[i]; + /* Note that sprite_coord enable is a bitfield of + * PIPE_MAX_SHADER_OUTPUTS bits. + */ + if (generic_index < PIPE_MAX_SHADER_OUTPUTS && + (rast->sprite_coord_enable & (1 << generic_index))) { + /* OK, this generic attribute needs to be replaced with a + * texcoord (see above). + */ + int slot = draw_find_shader_output(draw, + TGSI_SEMANTIC_GENERIC, + generic_index); + + if (slot > 0) { + /* there's already a post-vertex shader attribute + * for this fragment shader input attribute. + */ + } + else { + /* need to allocate a new post-vertex shader attribute */ + slot = draw_alloc_extra_vertex_attrib(draw, + TGSI_SEMANTIC_GENERIC, + generic_index); + } + + /* add this slot to the texcoord-gen list */ + wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot; + } } } - wide->num_texcoords = j; - - /* find fragment shader PointCoord input */ - wide->point_coord_fs_input = find_pntc_input_attrib(draw); - - /* setup extra vp output (point coord implemented as a texcoord) */ - draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_shader_outputs.semantic_index = 0; - draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw); - } - else { - wide->point_coord_fs_input = -1; - draw->extra_shader_outputs.slot = 0; } wide->psize_slot = -1; @@ -295,7 +292,8 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags ) stage->point = widepoint_first_point; stage->next->flush( stage->next, flags ); - stage->draw->extra_shader_outputs.slot = 0; + + draw_remove_extra_vertex_attribs(draw); /* restore original rasterizer state */ if (draw->rast_handle) { diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 362f563ba6..d417f825a0 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -250,6 +250,11 @@ struct draw_context struct tgsi_sampler **samplers; } gs; + /** Fragment shader state */ + struct { + struct draw_fragment_shader *fragment_shader; + } fs; + /** Stream output (vertex feedback) state */ struct { struct pipe_stream_output_state state; @@ -266,9 +271,10 @@ struct draw_context /* If a prim stage introduces new vertex attributes, they'll be stored here */ struct { - uint semantic_name; - uint semantic_index; - int slot; + uint num; + uint semantic_name[10]; + uint semantic_index[10]; + uint slot[10]; } extra_shader_outputs; unsigned reduced_prim; @@ -362,6 +368,11 @@ void draw_gs_destroy( struct draw_context *draw ); uint draw_current_shader_outputs(const struct draw_context *draw); uint draw_current_shader_position_output(const struct draw_context *draw); +int draw_alloc_extra_vertex_attrib(struct draw_context *draw, + uint semantic_name, uint semantic_index); +void draw_remove_extra_vertex_attribs(struct draw_context *draw); + + /******************************************************************************* * Vertex processing (was passthrough) code: */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index dce3c3745b..00f419a486 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -614,17 +614,15 @@ lp_build_div(struct lp_build_context *bld, /** - * Linear interpolation. - * - * This also works for integer values with a few caveats. + * Linear interpolation -- without any checks. * * @sa http://www.stereopsis.com/doubleblend.html */ -LLVMValueRef -lp_build_lerp(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef v0, - LLVMValueRef v1) +static INLINE LLVMValueRef +lp_build_lerp_simple(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1) { LLVMValueRef delta; LLVMValueRef res; @@ -639,12 +637,80 @@ lp_build_lerp(struct lp_build_context *bld, res = lp_build_add(bld, v0, res); - if(bld->type.fixed) + if (bld->type.fixed) { /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, * but it will be wrong for other uses. Basically we need a more * powerful lp_type, capable of further distinguishing the values * interpretation from the value storage. */ res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), ""); + } + + return res; +} + + +/** + * Linear interpolation. + */ +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1) +{ + const struct lp_type type = bld->type; + LLVMValueRef res; + + assert(lp_check_value(type, x)); + assert(lp_check_value(type, v0)); + assert(lp_check_value(type, v1)); + + if (type.norm) { + struct lp_type wide_type; + struct lp_build_context wide_bld; + LLVMValueRef xl, xh, v0l, v0h, v1l, v1h, resl, resh; + LLVMValueRef shift; + + assert(type.length >= 2); + assert(!type.sign); + + /* + * Create a wider type, enough to hold the intermediate result of the + * multiplication. + */ + memset(&wide_type, 0, sizeof wide_type); + wide_type.fixed = TRUE; + wide_type.width = type.width*2; + wide_type.length = type.length/2; + + lp_build_context_init(&wide_bld, bld->builder, wide_type); + + lp_build_unpack2(bld->builder, type, wide_type, x, &xl, &xh); + lp_build_unpack2(bld->builder, type, wide_type, v0, &v0l, &v0h); + lp_build_unpack2(bld->builder, type, wide_type, v1, &v1l, &v1h); + + /* + * Scale x from [0, 255] to [0, 256] + */ + + shift = lp_build_const_int_vec(wide_type, type.width - 1); + + xl = lp_build_add(&wide_bld, xl, + LLVMBuildAShr(bld->builder, xl, shift, "")); + xh = lp_build_add(&wide_bld, xh, + LLVMBuildAShr(bld->builder, xh, shift, "")); + + /* + * Lerp both halves. + */ + + resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l); + resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h); + + res = lp_build_pack2(bld->builder, wide_type, type, resl, resh); + } else { + res = lp_build_lerp_simple(bld, x, v0, v1); + } return res; } @@ -923,35 +989,122 @@ lp_build_round_sse41(struct lp_build_context *bld, enum lp_build_round_sse41_mode mode) { const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef i32t = LLVMInt32Type(); const char *intrinsic; + LLVMValueRef res; assert(type.floating); - assert(type.width*type.length == 128); + assert(lp_check_value(type, a)); assert(util_cpu_caps.has_sse4_1); - switch(type.width) { - case 32: - intrinsic = "llvm.x86.sse41.round.ps"; - break; - case 64: - intrinsic = "llvm.x86.sse41.round.pd"; - break; - default: - assert(0); - return bld->undef; + if (type.length == 1) { + LLVMTypeRef vec_type; + LLVMValueRef undef; + LLVMValueRef args[3]; + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + + switch(type.width) { + case 32: + intrinsic = "llvm.x86.sse41.round.ss"; + break; + case 64: + intrinsic = "llvm.x86.sse41.round.sd"; + break; + default: + assert(0); + return bld->undef; + } + + vec_type = LLVMVectorType(bld->elem_type, 4); + + undef = LLVMGetUndef(vec_type); + + args[0] = undef; + args[1] = LLVMBuildInsertElement(bld->builder, undef, a, index0, ""); + args[2] = LLVMConstInt(i32t, mode, 0); + + res = lp_build_intrinsic(bld->builder, intrinsic, + vec_type, args, Elements(args)); + + res = LLVMBuildExtractElement(bld->builder, res, index0, ""); + } + else { + assert(type.width*type.length == 128); + + switch(type.width) { + case 32: + intrinsic = "llvm.x86.sse41.round.ps"; + break; + case 64: + intrinsic = "llvm.x86.sse41.round.pd"; + break; + default: + assert(0); + return bld->undef; + } + + res = lp_build_intrinsic_binary(bld->builder, intrinsic, + bld->vec_type, a, + LLVMConstInt(i32t, mode, 0)); + } + + return res; +} + + +static INLINE LLVMValueRef +lp_build_iround_nearest_sse2(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMTypeRef ret_type = lp_build_int_vec_type(type); + const char *intrinsic; + LLVMValueRef res; + + assert(type.floating); + /* using the double precision conversions is a bit more complicated */ + assert(type.width == 32); + + assert(lp_check_value(type, a)); + assert(util_cpu_caps.has_sse2); + + /* This is relying on MXCSR rounding mode, which should always be nearest. */ + if (type.length == 1) { + LLVMTypeRef vec_type; + LLVMValueRef undef; + LLVMValueRef arg; + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + + vec_type = LLVMVectorType(bld->elem_type, 4); + + intrinsic = "llvm.x86.sse.cvtss2si"; + + undef = LLVMGetUndef(vec_type); + + arg = LLVMBuildInsertElement(bld->builder, undef, a, index0, ""); + + res = lp_build_intrinsic_unary(bld->builder, intrinsic, + ret_type, arg); + } + else { + assert(type.width*type.length == 128); + + intrinsic = "llvm.x86.sse2.cvtps2dq"; + + res = lp_build_intrinsic_unary(bld->builder, intrinsic, + ret_type, a); } - return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, - LLVMConstInt(LLVMInt32Type(), mode, 0)); + return res; } /** - * Return the integer part of a float (vector) value. The returned value is - * a float (vector). - * Ex: trunc(-1.5) = 1.0 + * Return the integer part of a float (vector) value (== round toward zero). + * The returned value is a float (vector). + * Ex: trunc(-1.5) = -1.0 */ LLVMValueRef lp_build_trunc(struct lp_build_context *bld, @@ -962,8 +1115,10 @@ lp_build_trunc(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) + if (util_cpu_caps.has_sse4_1 && + (type.length == 1 || type.width*type.length == 128)) { return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); + } else { LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); @@ -990,8 +1145,10 @@ lp_build_round(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) + if (util_cpu_caps.has_sse4_1 && + (type.length == 1 || type.width*type.length == 128)) { return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); + } else { LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef res; @@ -1016,8 +1173,10 @@ lp_build_floor(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) + if (util_cpu_caps.has_sse4_1 && + (type.length == 1 || type.width*type.length == 128)) { return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); + } else { LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef res; @@ -1042,8 +1201,10 @@ lp_build_ceil(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) + if (util_cpu_caps.has_sse4_1 && + (type.length == 1 || type.width*type.length == 128)) { return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); + } else { LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef res; @@ -1068,9 +1229,9 @@ lp_build_fract(struct lp_build_context *bld, /** - * Return the integer part of a float (vector) value. The returned value is - * an integer (vector). - * Ex: itrunc(-1.5) = 1 + * Return the integer part of a float (vector) value (== round toward zero). + * The returned value is an integer (vector). + * Ex: itrunc(-1.5) = -1 */ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, @@ -1097,31 +1258,40 @@ lp_build_iround(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMValueRef res; assert(type.floating); assert(lp_check_value(type, a)); - if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { + if (util_cpu_caps.has_sse2 && + ((type.width == 32) && (type.length == 1 || type.length == 4))) { + return lp_build_iround_nearest_sse2(bld, a); + } + else if (util_cpu_caps.has_sse4_1 && + (type.length == 1 || type.width*type.length == 128)) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); } else { - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); - LLVMValueRef sign; LLVMValueRef half; - /* get sign bit */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - - /* sign * 0.5 */ half = lp_build_const_vec(type, 0.5); - half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); - half = LLVMBuildOr(bld->builder, sign, half, ""); - half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); + + if (type.sign) { + LLVMTypeRef vec_type = bld->vec_type; + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + + /* get sign bit */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + + /* sign * 0.5 */ + half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); + half = LLVMBuildOr(bld->builder, sign, half, ""); + half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); + } res = LLVMBuildFAdd(bld->builder, a, half, ""); } @@ -1142,37 +1312,42 @@ lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMValueRef res; assert(type.floating); assert(lp_check_value(type, a)); - if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { + if (util_cpu_caps.has_sse4_1 && + (type.length == 1 || type.width*type.length == 128)) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); } else { - /* Take the sign bit and add it to 1 constant */ - LLVMTypeRef vec_type = lp_build_vec_type(type); - unsigned mantissa = lp_mantissa(type); - LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); - LLVMValueRef sign; - LLVMValueRef offset; - - /* sign = a < 0 ? ~0 : 0 */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign"); - - /* offset = -0.99999(9)f */ - offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); - offset = LLVMConstBitCast(offset, int_vec_type); - - /* offset = a < 0 ? offset : 0.0f */ - offset = LLVMBuildAnd(bld->builder, offset, sign, ""); - offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); - - res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res"); + res = a; + + if (type.sign) { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef vec_type = bld->vec_type; + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef offset; + + /* sign = a < 0 ? ~0 : 0 */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign"); + + /* offset = -0.99999(9)f */ + offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); + offset = LLVMConstBitCast(offset, int_vec_type); + + /* offset = a < 0 ? offset : 0.0f */ + offset = LLVMBuildAnd(bld->builder, offset, sign, ""); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); + + res = LLVMBuildFAdd(bld->builder, res, offset, "ifloor.res"); + } } /* round to nearest (toward zero) */ @@ -1192,35 +1367,39 @@ lp_build_iceil(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMValueRef res; assert(type.floating); assert(lp_check_value(type, a)); - if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { + if (util_cpu_caps.has_sse4_1 && + (type.length == 1 || type.width*type.length == 128)) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); } else { - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = bld->vec_type; unsigned mantissa = lp_mantissa(type); - LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); - LLVMValueRef sign; LLVMValueRef offset; - /* sign = a < 0 ? 0 : ~0 */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign"); - sign = LLVMBuildNot(bld->builder, sign, "iceil.not"); - /* offset = 0.99999(9)f */ offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); - offset = LLVMConstBitCast(offset, int_vec_type); - /* offset = a < 0 ? 0.0 : offset */ - offset = LLVMBuildAnd(bld->builder, offset, sign, ""); - offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); + if (type.sign) { + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + + /* sign = a < 0 ? 0 : ~0 */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign"); + sign = LLVMBuildNot(bld->builder, sign, "iceil.not"); + + /* offset = a < 0 ? 0.0 : offset */ + offset = LLVMConstBitCast(offset, int_vec_type); + offset = LLVMBuildAnd(bld->builder, offset, sign, ""); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); + } res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res"); } @@ -1232,6 +1411,46 @@ lp_build_iceil(struct lp_build_context *bld, } +/** + * Combined ifloor() & fract(). + * + * Preferred to calling the functions separately, as it will ensure that the + * stratergy (floor() vs ifloor()) that results in less redundant work is used. + */ +void +lp_build_ifloor_fract(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef *out_ipart, + LLVMValueRef *out_fpart) +{ + const struct lp_type type = bld->type; + LLVMValueRef ipart; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if (util_cpu_caps.has_sse4_1 && + (type.length == 1 || type.width*type.length == 128)) { + /* + * floor() is easier. + */ + + ipart = lp_build_floor(bld, a); + *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); + *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart"); + } + else { + /* + * ifloor() is easier. + */ + + *out_ipart = lp_build_ifloor(bld, a); + ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart"); + *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); + } +} + + LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) @@ -2041,6 +2260,71 @@ lp_build_exp2(struct lp_build_context *bld, /** + * Extract the exponent of a IEEE-754 floating point value. + * + * Optionally apply an integer bias. + * + * Result is an integer value with + * + * ifloor(log2(x)) + bias + */ +LLVMValueRef +lp_build_extract_exponent(struct lp_build_context *bld, + LLVMValueRef x, + int bias) +{ + const struct lp_type type = bld->type; + unsigned mantissa = lp_mantissa(type); + LLVMValueRef res; + + assert(type.floating); + + assert(lp_check_value(bld->type, x)); + + x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, ""); + + res = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), ""); + res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(type, 255), ""); + res = LLVMBuildSub(bld->builder, res, lp_build_const_int_vec(type, 127 - bias), ""); + + return res; +} + + +/** + * Extract the mantissa of the a floating. + * + * Result is a floating point value with + * + * x / floor(log2(x)) + */ +LLVMValueRef +lp_build_extract_mantissa(struct lp_build_context *bld, + LLVMValueRef x) +{ + const struct lp_type type = bld->type; + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1); + LLVMValueRef one = LLVMConstBitCast(bld->one, bld->int_vec_type); + LLVMValueRef res; + + assert(lp_check_value(bld->type, x)); + + assert(type.floating); + + x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, ""); + + /* res = x / 2**ipart */ + res = LLVMBuildAnd(bld->builder, x, mantmask, ""); + res = LLVMBuildOr(bld->builder, res, one, ""); + res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); + + return res; +} + + + +/** * Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ * These coefficients can be generate with * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html @@ -2159,3 +2443,62 @@ lp_build_log2(struct lp_build_context *bld, lp_build_log2_approx(bld, x, NULL, NULL, &res); return res; } + + +/** + * Faster (and less accurate) log2. + * + * log2(x) = floor(log2(x)) - 1 + x / 2**floor(log2(x)) + * + * Piece-wise linear approximation, with exact results when x is a + * power of two. + * + * See http://www.flipcode.com/archives/Fast_log_Function.shtml + */ +LLVMValueRef +lp_build_fast_log2(struct lp_build_context *bld, + LLVMValueRef x) +{ + LLVMValueRef ipart; + LLVMValueRef fpart; + + assert(lp_check_value(bld->type, x)); + + assert(bld->type.floating); + + /* ipart = floor(log2(x)) - 1 */ + ipart = lp_build_extract_exponent(bld, x, -1); + ipart = LLVMBuildSIToFP(bld->builder, ipart, bld->vec_type, ""); + + /* fpart = x / 2**ipart */ + fpart = lp_build_extract_mantissa(bld, x); + + /* ipart + fpart */ + return LLVMBuildFAdd(bld->builder, ipart, fpart, ""); +} + + +/** + * Fast implementation of iround(log2(x)). + * + * Not an approximation -- it should give accurate results all the time. + */ +LLVMValueRef +lp_build_ilog2(struct lp_build_context *bld, + LLVMValueRef x) +{ + LLVMValueRef sqrt2 = lp_build_const_vec(bld->type, M_SQRT2); + LLVMValueRef ipart; + + assert(bld->type.floating); + + assert(lp_check_value(bld->type, x)); + + /* x * 2^(0.5) i.e., add 0.5 to the log2(x) */ + x = LLVMBuildFMul(bld->builder, x, sqrt2, ""); + + /* ipart = floor(log2(x) + 0.5) */ + ipart = lp_build_extract_exponent(bld, x, 0); + + return ipart; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 31efa9921c..c78b61decf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -171,6 +171,12 @@ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a); +void +lp_build_ifloor_fract(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef *out_ipart, + LLVMValueRef *out_fpart); + LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a); @@ -209,9 +215,26 @@ lp_build_exp2(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_extract_exponent(struct lp_build_context *bld, + LLVMValueRef x, + int bias); + +LLVMValueRef +lp_build_extract_mantissa(struct lp_build_context *bld, + LLVMValueRef x); + +LLVMValueRef lp_build_log2(struct lp_build_context *bld, LLVMValueRef a); +LLVMValueRef +lp_build_fast_log2(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ilog2(struct lp_build_context *bld, + LLVMValueRef x); + void lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef x, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 8b477313d4..6967dd2622 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -63,6 +63,7 @@ #include "util/u_debug.h" #include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -96,58 +97,104 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); LLVMValueRef res; unsigned mantissa; - unsigned n; - unsigned long long ubound; - unsigned long long mask; - double scale; - double bias; assert(src_type.floating); + assert(dst_width <= src_type.width); + src_type.sign = FALSE; mantissa = lp_mantissa(src_type); - /* We cannot carry more bits than the mantissa */ - n = MIN2(mantissa, dst_width); + if (dst_width <= mantissa) { + /* + * Apply magic coefficients that will make the desired result to appear + * in the lowest significant bits of the mantissa, with correct rounding. + * + * This only works if the destination width fits in the mantissa. + */ - /* This magic coefficients will make the desired result to appear in the - * lowest significant bits of the mantissa. - */ - ubound = ((unsigned long long)1 << n); - mask = ubound - 1; - scale = (double)mask/ubound; - bias = (double)((unsigned long long)1 << (mantissa - n)); + unsigned long long ubound; + unsigned long long mask; + double scale; + double bias; - res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); - res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); - res = LLVMBuildBitCast(builder, res, int_vec_type, ""); + ubound = (1ULL << dst_width); + mask = ubound - 1; + scale = (double)mask/ubound; + bias = (double)(1ULL << (mantissa - dst_width)); - if(dst_width > n) { - int shift = dst_width - n; - res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), ""); + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); + res = LLVMBuildBitCast(builder, res, int_vec_type, ""); + res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); + } + else if (dst_width == (mantissa + 1)) { + /* + * The destination width matches exactly what can be represented in + * floating point (i.e., mantissa + 1 bits). So do a straight + * multiplication followed by casting. No further rounding is necessary. + */ + + double scale; - /* TODO: Fill in the empty lower bits for additional precision? */ - /* YES: this fixes progs/trivial/tri-z-eq.c. - * Otherwise vertex Z=1.0 values get converted to something like - * 0xfffffb00 and the test for equality with 0xffffffff fails. + scale = (double)((1ULL << dst_width) - 1); + + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); + } + else { + /* + * The destination exceeds what can be represented in the floating point. + * So multiply by the largest power two we get away with, and when + * subtract the most significant bit to rescale to normalized values. + * + * The largest power of two factor we can get away is + * (1 << (src_type.width - 1)), because we need to use signed . In theory it + * should be (1 << (src_type.width - 2)), but IEEE 754 rules states + * INT_MIN should be returned in FPToSI, which is the correct result for + * values near 1.0! + * + * This means we get (src_type.width - 1) correct bits for values near 0.0, + * and (mantissa + 1) correct bits for values near 1.0. Equally or more + * important, we also get exact results for 0.0 and 1.0. */ -#if 0 - { - LLVMValueRef msb; - msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), ""); - msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), ""); - msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), ""); - res = LLVMBuildOr(builder, res, msb, ""); - } -#elif 0 - while(shift > 0) { - res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), ""); - shift -= n; - n *= 2; + + unsigned n = MIN2(src_type.width - 1, dst_width); + + double scale = (double)(1ULL << n); + unsigned lshift = dst_width - n; + unsigned rshift = n; + LLVMValueRef lshifted; + LLVMValueRef rshifted; + + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); + + /* + * Align the most significant bit to its final place. + * + * This will cause 1.0 to overflow to 0, but the later adjustment will + * get it right. + */ + if (lshift) { + lshifted = LLVMBuildShl(builder, res, + lp_build_const_int_vec(src_type, lshift), ""); + } else { + lshifted = res; } -#endif + + /* + * Align the most significant bit to the right. + */ + rshifted = LLVMBuildAShr(builder, res, + lp_build_const_int_vec(src_type, rshift), ""); + + /* + * Subtract the MSB to the LSB, therefore re-scaling from + * (1 << dst_width) to ((1 << dst_width) - 1). + */ + + res = LLVMBuildSub(builder, lshifted, rshifted, ""); } - else - res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); return res; } @@ -177,6 +224,16 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, assert(dst_type.floating); + /* Special-case int8->float, though most cases could be handled + * this way: + */ + if (src_width == 8) { + scale = 1.0/255.0; + res = LLVMBuildSIToFP(builder, src, vec_type, ""); + res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); + return res; + } + mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); @@ -241,6 +298,87 @@ lp_build_conv(LLVMBuilderRef builder, } num_tmps = num_srcs; + + /* Special case 4x4f --> 1x16ub + */ + if (src_type.floating == 1 && + src_type.fixed == 0 && + src_type.sign == 1 && + src_type.norm == 0 && + src_type.width == 32 && + src_type.length == 4 && + + dst_type.floating == 0 && + dst_type.fixed == 0 && + dst_type.sign == 0 && + dst_type.norm == 1 && + dst_type.width == 8 && + dst_type.length == 16 && + + util_cpu_caps.has_sse2) + { + int i; + + for (i = 0; i < num_dsts; i++, src += 4) { + struct lp_type int16_type = dst_type; + struct lp_type int32_type = dst_type; + LLVMValueRef lo, hi; + LLVMValueRef src_int0; + LLVMValueRef src_int1; + LLVMValueRef src_int2; + LLVMValueRef src_int3; + LLVMTypeRef int16_vec_type; + LLVMTypeRef int32_vec_type; + LLVMTypeRef src_vec_type; + LLVMTypeRef dst_vec_type; + LLVMValueRef const_255f; + LLVMValueRef a, b, c, d; + + int16_type.width *= 2; + int16_type.length /= 2; + int16_type.sign = 1; + + int32_type.width *= 4; + int32_type.length /= 4; + int32_type.sign = 1; + + src_vec_type = lp_build_vec_type(src_type); + dst_vec_type = lp_build_vec_type(dst_type); + int16_vec_type = lp_build_vec_type(int16_type); + int32_vec_type = lp_build_vec_type(int32_type); + + const_255f = lp_build_const_vec(src_type, 255.0f); + + a = LLVMBuildFMul(builder, src[0], const_255f, ""); + b = LLVMBuildFMul(builder, src[1], const_255f, ""); + c = LLVMBuildFMul(builder, src[2], const_255f, ""); + d = LLVMBuildFMul(builder, src[3], const_255f, ""); + + { + struct lp_build_context bld; + + bld.builder = builder; + bld.type = src_type; + bld.vec_type = src_vec_type; + bld.int_elem_type = lp_build_elem_type(int32_type); + bld.int_vec_type = int32_vec_type; + bld.undef = lp_build_undef(src_type); + bld.zero = lp_build_zero(src_type); + bld.one = lp_build_one(src_type); + + src_int0 = lp_build_iround(&bld, a); + src_int1 = lp_build_iround(&bld, b); + src_int2 = lp_build_iround(&bld, c); + src_int3 = lp_build_iround(&bld, d); + } + /* relying on clamping behavior of sse2 intrinsics here */ + lo = lp_build_pack2(builder, int32_type, int16_type, src_int0, src_int1); + hi = lp_build_pack2(builder, int32_type, int16_type, src_int2, src_int3); + dst[i] = lp_build_pack2(builder, int16_type, dst_type, lo, hi); + } + return; + } + /* * Clamp if necessary */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c index d3a5afff8c..93e56553d7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c @@ -57,6 +57,8 @@ lp_disassemble(const void* func) #ifdef HAVE_UDIS86 ud_t ud_obj; uint64_t max_jmp_pc; + uint inst_no; + boolean emit_addrs = TRUE, emit_line_nos = FALSE; ud_init(&ud_obj); @@ -76,13 +78,18 @@ lp_disassemble(const void* func) while (ud_disassemble(&ud_obj)) { + if (emit_addrs) { #ifdef PIPE_ARCH_X86 - debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); + debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); #endif #ifdef PIPE_ARCH_X86_64 - debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); + debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); #endif - + } + else if (emit_line_nos) { + debug_printf("%6d:\t", inst_no); + inst_no++; + } #if 0 debug_printf("%-16s ", ud_insn_hex(&ud_obj)); #endif @@ -115,8 +122,10 @@ lp_disassemble(const void* func) } } - if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) || - ud_obj.mnemonic == UD_Iinvalid) + if (ud_obj.mnemonic == UD_Iinvalid || + (ud_insn_off(&ud_obj) >= max_jmp_pc && + (ud_obj.mnemonic == UD_Iret || + ud_obj.mnemonic == UD_Ijmp))) break; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 369c1bbf09..eb11dcd4ef 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -36,11 +36,12 @@ #include "util/u_string.h" -#define GALLIVM_DEBUG_TGSI 0x1 -#define GALLIVM_DEBUG_IR 0x2 -#define GALLIVM_DEBUG_ASM 0x4 -#define GALLIVM_DEBUG_NO_OPT 0x8 -#define GALLIVM_DEBUG_PERF 0x10 +#define GALLIVM_DEBUG_TGSI (1 << 0) +#define GALLIVM_DEBUG_IR (1 << 1) +#define GALLIVM_DEBUG_ASM (1 << 2) +#define GALLIVM_DEBUG_NO_OPT (1 << 3) +#define GALLIVM_DEBUG_PERF (1 << 4) +#define GALLIVM_DEBUG_NO_BRILINEAR (1 << 5) #ifdef DEBUG diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index 5bc9c741a8..a2cee199a0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -38,273 +38,15 @@ #include "lp_bld_flow.h" -#define LP_BUILD_FLOW_MAX_VARIABLES 64 -#define LP_BUILD_FLOW_MAX_DEPTH 32 - -/** - * Enumeration of all possible flow constructs. - */ -enum lp_build_flow_construct_kind { - LP_BUILD_FLOW_SCOPE, - LP_BUILD_FLOW_SKIP, - LP_BUILD_FLOW_IF -}; - - -/** - * Variable declaration scope. - */ -struct lp_build_flow_scope -{ - /** Number of variables declared in this scope */ - unsigned num_variables; -}; - - -/** - * Early exit. Useful to skip to the end of a function or block when - * the execution mask becomes zero or when there is an error condition. - */ -struct lp_build_flow_skip -{ - /** Block to skip to */ - LLVMBasicBlockRef block; - - /** Number of variables declared at the beginning */ - unsigned num_variables; - - LLVMValueRef *phi; /**< array [num_variables] */ -}; - - -/** - * if/else/endif. - */ -struct lp_build_flow_if -{ - unsigned num_variables; - - LLVMValueRef *phi; /**< array [num_variables] */ - - LLVMValueRef condition; - LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; -}; - - -/** - * Union of all possible flow constructs' data - */ -union lp_build_flow_construct_data -{ - struct lp_build_flow_scope scope; - struct lp_build_flow_skip skip; - struct lp_build_flow_if ifthen; -}; - - -/** - * Element of the flow construct stack. - */ -struct lp_build_flow_construct -{ - enum lp_build_flow_construct_kind kind; - union lp_build_flow_construct_data data; -}; - - /** - * All necessary data to generate LLVM control flow constructs. + * Insert a new block, right where builder is pointing to. * - * Besides keeping track of the control flow construct themselves we also - * need to keep track of variables in order to generate SSA Phi values. - */ -struct lp_build_flow_context -{ - LLVMBuilderRef builder; - - /** - * Control flow stack. - */ - struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; - unsigned num_constructs; - - /** - * Variable stack - */ - LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; - unsigned num_variables; -}; - - -struct lp_build_flow_context * -lp_build_flow_create(LLVMBuilderRef builder) -{ - struct lp_build_flow_context *flow; - - flow = CALLOC_STRUCT(lp_build_flow_context); - if(!flow) - return NULL; - - flow->builder = builder; - - return flow; -} - - -void -lp_build_flow_destroy(struct lp_build_flow_context *flow) -{ - assert(flow->num_constructs == 0); - assert(flow->num_variables == 0); - FREE(flow); -} - - -/** - * Begin/push a new flow control construct, such as a loop, skip block - * or variable scope. - */ -static union lp_build_flow_construct_data * -lp_build_flow_push(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); - if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) - return NULL; - - flow->constructs[flow->num_constructs].kind = kind; - return &flow->constructs[flow->num_constructs++].data; -} - - -/** - * Return the current/top flow control construct on the stack. - * \param kind the expected type of the top-most construct - */ -static union lp_build_flow_construct_data * -lp_build_flow_peek(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs); - if(!flow->num_constructs) - return NULL; - - assert(flow->constructs[flow->num_constructs - 1].kind == kind); - if(flow->constructs[flow->num_constructs - 1].kind != kind) - return NULL; - - return &flow->constructs[flow->num_constructs - 1].data; -} - - -/** - * End/pop the current/top flow control construct on the stack. - * \param kind the expected type of the top-most construct - */ -static union lp_build_flow_construct_data * -lp_build_flow_pop(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs); - if(!flow->num_constructs) - return NULL; - - assert(flow->constructs[flow->num_constructs - 1].kind == kind); - if(flow->constructs[flow->num_constructs - 1].kind != kind) - return NULL; - - return &flow->constructs[--flow->num_constructs].data; -} - - -/** - * Begin a variable scope. + * This is useful important not only for aesthetic reasons, but also for + * performance reasons, as frequently run blocks should be laid out next to + * each other and fall-throughs maximized. * + * See also llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp. * - */ -void -lp_build_flow_scope_begin(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - scope->num_variables = 0; -} - - -/** - * Declare a variable. - * - * A variable is a named entity which can have different LLVMValueRef's at - * different points of the program. This is relevant for control flow because - * when there are multiple branches to a same location we need to replace - * the variable's value with a Phi function as explained in - * http://en.wikipedia.org/wiki/Static_single_assignment_form . - * - * We keep track of variables by keeping around a pointer to where they're - * current. - * - * There are a few cautions to observe: - * - * - Variable's value must not be NULL. If there is no initial value then - * LLVMGetUndef() should be used. - * - * - Variable's value must be kept up-to-date. If the variable is going to be - * modified by a function then a pointer should be passed so that its value - * is accurate. Failure to do this will cause some of the variables' - * transient values to be lost, leading to wrong results. - * - * - A program should be written from top to bottom, by always appending - * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or - * modifying existing statements will most likely lead to wrong results. - * - */ -void -lp_build_flow_scope_declare(struct lp_build_flow_context *flow, - LLVMValueRef *variable) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - assert(*variable); - if(!*variable) - return; - - assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); - if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) - return; - - flow->variables[flow->num_variables++] = variable; - ++scope->num_variables; -} - - -void -lp_build_flow_scope_end(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - assert(flow->num_variables >= scope->num_variables); - if(flow->num_variables < scope->num_variables) { - flow->num_variables = 0; - return; - } - - flow->num_variables -= scope->num_variables; -} - - -/** * Note: this function has no dependencies on the flow code and could * be used elsewhere. */ @@ -334,52 +76,18 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) } -static LLVMBasicBlockRef -lp_build_flow_insert_block(struct lp_build_flow_context *flow) -{ - return lp_build_insert_new_block(flow->builder, ""); -} - - /** * Begin a "skip" block. Inside this block we can test a condition and * skip to the end of the block if the condition is false. */ void -lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +lp_build_flow_skip_begin(struct lp_build_skip_context *skip, + LLVMBuilderRef builder) { - struct lp_build_flow_skip *skip; - LLVMBuilderRef builder; - unsigned i; - - skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; + skip->builder = builder; /* create new basic block */ - skip->block = lp_build_flow_insert_block(flow); - - skip->num_variables = flow->num_variables; - if(!skip->num_variables) { - skip->phi = NULL; - return; - } - - /* Allocate a Phi node for each variable in this skip scope */ - skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); - if(!skip->phi) { - skip->num_variables = 0; - return; - } - - builder = LLVMCreateBuilder(); - LLVMPositionBuilderAtEnd(builder, skip->block); - - /* create a Phi node for each variable */ - for(i = 0; i < skip->num_variables; ++i) - skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); - - LLVMDisposeBuilder(builder); + skip->block = lp_build_insert_new_block(skip->builder, "skip"); } @@ -388,83 +96,50 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) * skip block if the condition is true. */ void -lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, +lp_build_flow_skip_cond_break(struct lp_build_skip_context *skip, LLVMValueRef cond) { - struct lp_build_flow_skip *skip; - LLVMBasicBlockRef current_block; LLVMBasicBlockRef new_block; - unsigned i; - - skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - current_block = LLVMGetInsertBlock(flow->builder); - - new_block = lp_build_flow_insert_block(flow); - - /* for each variable, update the Phi node with a (variable, block) pair */ - for(i = 0; i < skip->num_variables; ++i) { - assert(*flow->variables[i]); - assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); - } + new_block = lp_build_insert_new_block(skip->builder, ""); /* if cond is true, goto skip->block, else goto new_block */ - LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + LLVMBuildCondBr(skip->builder, cond, skip->block, new_block); - LLVMPositionBuilderAtEnd(flow->builder, new_block); + LLVMPositionBuilderAtEnd(skip->builder, new_block); } void -lp_build_flow_skip_end(struct lp_build_flow_context *flow) +lp_build_flow_skip_end(struct lp_build_skip_context *skip) { - struct lp_build_flow_skip *skip; - LLVMBasicBlockRef current_block; - unsigned i; - - skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - - current_block = LLVMGetInsertBlock(flow->builder); - - /* add (variable, block) tuples to the phi nodes */ - for(i = 0; i < skip->num_variables; ++i) { - assert(*flow->variables[i]); - assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); - *flow->variables[i] = skip->phi[i]; - } - /* goto block */ - LLVMBuildBr(flow->builder, skip->block); - LLVMPositionBuilderAtEnd(flow->builder, skip->block); - - FREE(skip->phi); + LLVMBuildBr(skip->builder, skip->block); + LLVMPositionBuilderAtEnd(skip->builder, skip->block); } /** * Check if the mask predicate is zero. If so, jump to the end of the block. */ -static void +void lp_build_mask_check(struct lp_build_mask_context *mask) { - LLVMBuilderRef builder = mask->flow->builder; + LLVMBuilderRef builder = mask->skip.builder; + LLVMValueRef value; LLVMValueRef cond; + value = lp_build_mask_value(mask); + /* cond = (mask == 0) */ cond = LLVMBuildICmp(builder, LLVMIntEQ, - LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMBuildBitCast(builder, value, mask->reg_type, ""), LLVMConstNull(mask->reg_type), ""); /* if cond, goto end of block */ - lp_build_flow_skip_cond_break(mask->flow, cond); + lp_build_flow_skip_cond_break(&mask->skip, cond); } @@ -477,21 +152,27 @@ lp_build_mask_check(struct lp_build_mask_context *mask) */ void lp_build_mask_begin(struct lp_build_mask_context *mask, - struct lp_build_flow_context *flow, + LLVMBuilderRef builder, struct lp_type type, LLVMValueRef value) { memset(mask, 0, sizeof *mask); - mask->flow = flow; mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; + mask->var = lp_build_alloca(builder, + lp_build_int_vec_type(type), + "execution_mask"); - lp_build_flow_scope_begin(flow); - lp_build_flow_scope_declare(flow, &mask->value); - lp_build_flow_skip_begin(flow); + LLVMBuildStore(builder, value, mask->var); - lp_build_mask_check(mask); + lp_build_flow_skip_begin(&mask->skip, builder); +} + + +LLVMValueRef +lp_build_mask_value(struct lp_build_mask_context *mask) +{ + return LLVMBuildLoad(mask->skip.builder, mask->var, ""); } @@ -504,9 +185,10 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) { - mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); - - lp_build_mask_check(mask); + value = LLVMBuildAnd(mask->skip.builder, + lp_build_mask_value(mask), + value, ""); + LLVMBuildStore(mask->skip.builder, value, mask->var); } @@ -516,9 +198,8 @@ lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask) { - lp_build_flow_skip_end(mask->flow); - lp_build_flow_scope_end(mask->flow); - return mask->value; + lp_build_flow_skip_end(&mask->skip); + return lp_build_mask_value(mask); } @@ -528,59 +209,27 @@ lp_build_loop_begin(LLVMBuilderRef builder, LLVMValueRef start, struct lp_build_loop_state *state) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); + state->block = lp_build_insert_new_block(builder, "loop_begin"); - state->block = LLVMAppendBasicBlock(function, "loop"); + state->counter_var = lp_build_alloca(builder, LLVMTypeOf(start), "loop_counter"); + + LLVMBuildStore(builder, start, state->counter_var); LLVMBuildBr(builder, state->block); LLVMPositionBuilderAtEnd(builder, state->block); - state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), ""); - - LLVMAddIncoming(state->counter, &start, &block, 1); - + state->counter = LLVMBuildLoad(builder, state->counter_var, ""); } void -lp_build_loop_end(LLVMBuilderRef builder, - LLVMValueRef end, - LLVMValueRef step, - struct lp_build_loop_state *state) -{ - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); - LLVMValueRef next; - LLVMValueRef cond; - LLVMBasicBlockRef after_block; - - if (!step) - step = LLVMConstInt(LLVMTypeOf(end), 1, 0); - - next = LLVMBuildAdd(builder, state->counter, step, ""); - - cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, ""); - - after_block = LLVMAppendBasicBlock(function, ""); - - LLVMBuildCondBr(builder, cond, after_block, state->block); - - LLVMAddIncoming(state->counter, &next, &block, 1); - - LLVMPositionBuilderAtEnd(builder, after_block); -} - -void lp_build_loop_end_cond(LLVMBuilderRef builder, LLVMValueRef end, LLVMValueRef step, - int llvm_cond, + LLVMIntPredicate llvm_cond, struct lp_build_loop_state *state) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); LLVMValueRef next; LLVMValueRef cond; LLVMBasicBlockRef after_block; @@ -590,15 +239,27 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, next = LLVMBuildAdd(builder, state->counter, step, ""); + LLVMBuildStore(builder, next, state->counter_var); + cond = LLVMBuildICmp(builder, llvm_cond, next, end, ""); - after_block = LLVMAppendBasicBlock(function, ""); + after_block = lp_build_insert_new_block(builder, "loop_end"); LLVMBuildCondBr(builder, cond, after_block, state->block); - LLVMAddIncoming(state->counter, &next, &block, 1); - LLVMPositionBuilderAtEnd(builder, after_block); + + state->counter = LLVMBuildLoad(builder, state->counter_var, ""); +} + + +void +lp_build_loop_end(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + struct lp_build_loop_state *state) +{ + lp_build_loop_end_cond(builder, end, step, LLVMIntNE, state); } @@ -616,24 +277,16 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, Is built with: - LLVMValueRef x = LLVMGetUndef(); // or something else + // x needs an alloca variable + x = lp_build_alloca(builder, type, "x"); - flow = lp_build_flow_create(builder); - lp_build_flow_scope_begin(flow); + lp_build_if(ctx, builder, cond); + LLVMBuildStore(LLVMBuildAdd(1, 2), x); + lp_build_else(ctx); + LLVMBuildStore(LLVMBuildAdd(2, 3). x); + lp_build_endif(ctx); - // x needs a phi node - lp_build_flow_scope_declare(flow, &x); - - lp_build_if(ctx, flow, builder, cond); - x = LLVMAdd(1, 2); - lp_build_else(ctx); - x = LLVMAdd(2, 3); - lp_build_endif(ctx); - - lp_build_flow_scope_end(flow); - - lp_build_flow_destroy(flow); */ @@ -642,47 +295,19 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, * Begin an if/else/endif construct. */ void -lp_build_if(struct lp_build_if_state *ctx, - struct lp_build_flow_context *flow, +lp_build_if(struct lp_build_if_state *ifthen, LLVMBuilderRef builder, LLVMValueRef condition) { LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - struct lp_build_flow_if *ifthen; - unsigned i; - - memset(ctx, 0, sizeof(*ctx)); - ctx->builder = builder; - ctx->flow = flow; - /* push/create new scope */ - ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - - ifthen->num_variables = flow->num_variables; + memset(ifthen, 0, sizeof *ifthen); + ifthen->builder = builder; ifthen->condition = condition; ifthen->entry_block = block; - /* create a Phi node for each variable in this flow scope */ - ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); - if (!ifthen->phi) { - ifthen->num_variables = 0; - return; - } - /* create endif/merge basic block for the phi functions */ ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); - LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); - - /* create a phi node for each variable */ - for (i = 0; i < flow->num_variables; i++) { - ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); - - /* add add the initial value of the var from the entry block */ - if (!LLVMIsUndef(*flow->variables[i])) - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], - &ifthen->entry_block, 1); - } /* create/insert true_block before merge_block */ ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); @@ -696,27 +321,16 @@ lp_build_if(struct lp_build_if_state *ctx, * Begin else-part of a conditional */ void -lp_build_else(struct lp_build_if_state *ctx) +lp_build_else(struct lp_build_if_state *ifthen) { - struct lp_build_flow_context *flow = ctx->flow; - struct lp_build_flow_if *ifthen; - unsigned i; - - ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - - /* for each variable, update the Phi node with a (variable, block) pair */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - } + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMBuildBr(ifthen->builder, ifthen->merge_block); /* create/insert false_block before the merge block */ ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); /* successive code goes into the else block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->false_block); } @@ -724,75 +338,30 @@ lp_build_else(struct lp_build_if_state *ctx) * End a conditional. */ void -lp_build_endif(struct lp_build_if_state *ctx) +lp_build_endif(struct lp_build_if_state *ifthen) { - struct lp_build_flow_context *flow = ctx->flow; - struct lp_build_flow_if *ifthen; - LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder); - unsigned i; - - ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - /* Insert branch to the merge block from current block */ - LLVMBuildBr(ctx->builder, ifthen->merge_block); + LLVMBuildBr(ifthen->builder, ifthen->merge_block); - if (ifthen->false_block) { - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - /* for each variable, update the Phi node with a (variable, block) pair */ - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1); - /* replace the variable ref with the phi function */ - *flow->variables[i] = ifthen->phi[i]; - } - } - else { - /* no else clause */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - - /* replace the variable ref with the phi function */ - *flow->variables[i] = ifthen->phi[i]; - } - } - - FREE(ifthen->phi); - - /*** - *** Now patch in the various branch instructions. - ***/ + /* + * Now patch in the various branch instructions. + */ /* Insert the conditional branch instruction at the end of entry_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block); + LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->entry_block); if (ifthen->false_block) { /* we have an else clause */ - LLVMBuildCondBr(ctx->builder, ifthen->condition, + LLVMBuildCondBr(ifthen->builder, ifthen->condition, ifthen->true_block, ifthen->false_block); } else { /* no else clause */ - LLVMBuildCondBr(ctx->builder, ifthen->condition, + LLVMBuildCondBr(ifthen->builder, ifthen->condition, ifthen->true_block, ifthen->merge_block); } - /* Insert branch from end of true_block to merge_block */ - if (ifthen->false_block) { - /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); - LLVMBuildBr(ctx->builder, ifthen->merge_block); - } - else { - /* No else clause. - * Note that we've already inserted the branch at the end of - * true_block. See the very first LLVMBuildBr() call in this function. - */ - } - /* Resume building code at end of the ifthen->merge_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); + LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->merge_block); } @@ -830,6 +399,7 @@ lp_build_alloca(LLVMBuilderRef builder, } res = LLVMBuildAlloca(first_builder, type, name); + LLVMBuildStore(builder, LLVMConstNull(type), res); LLVMDisposeBuilder(first_builder); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index fffb493a93..e729ee6eaa 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -41,52 +41,49 @@ struct lp_type; -struct lp_build_flow_context; - - -struct lp_build_flow_context * -lp_build_flow_create(LLVMBuilderRef builder); - -void -lp_build_flow_destroy(struct lp_build_flow_context *flow); - -void -lp_build_flow_scope_begin(struct lp_build_flow_context *flow); - -void -lp_build_flow_scope_declare(struct lp_build_flow_context *flow, - LLVMValueRef *variable); +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_skip_context +{ + LLVMBuilderRef builder; -void -lp_build_flow_scope_end(struct lp_build_flow_context *flow); + /** Block to skip to */ + LLVMBasicBlockRef block; +}; void -lp_build_flow_skip_begin(struct lp_build_flow_context *flow); +lp_build_flow_skip_begin(struct lp_build_skip_context *ctx, + LLVMBuilderRef builder); void -lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, +lp_build_flow_skip_cond_break(struct lp_build_skip_context *ctx, LLVMValueRef cond); void -lp_build_flow_skip_end(struct lp_build_flow_context *flow); +lp_build_flow_skip_end(struct lp_build_skip_context *ctx); struct lp_build_mask_context { - struct lp_build_flow_context *flow; + struct lp_build_skip_context skip; LLVMTypeRef reg_type; - LLVMValueRef value; + LLVMValueRef var; }; void lp_build_mask_begin(struct lp_build_mask_context *mask, - struct lp_build_flow_context *flow, + LLVMBuilderRef builder, struct lp_type type, LLVMValueRef value); +LLVMValueRef +lp_build_mask_value(struct lp_build_mask_context *mask); + /** * Bitwise AND the mask with the given value, if a previous mask was set. */ @@ -94,6 +91,9 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value); +void +lp_build_mask_check(struct lp_build_mask_context *mask); + LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask); @@ -108,6 +108,7 @@ lp_build_mask_end(struct lp_build_mask_context *mask); struct lp_build_loop_state { LLVMBasicBlockRef block; + LLVMValueRef counter_var; LLVMValueRef counter; }; @@ -128,22 +129,28 @@ void lp_build_loop_end_cond(LLVMBuilderRef builder, LLVMValueRef end, LLVMValueRef step, - int cond, /* LLVM condition */ + LLVMIntPredicate cond, struct lp_build_loop_state *state); +/** + * if/else/endif. + */ struct lp_build_if_state { LLVMBuilderRef builder; - struct lp_build_flow_context *flow; + LLVMValueRef condition; + LLVMBasicBlockRef entry_block; + LLVMBasicBlockRef true_block; + LLVMBasicBlockRef false_block; + LLVMBasicBlockRef merge_block; }; void lp_build_if(struct lp_build_if_state *ctx, - struct lp_build_flow_context *flow, LLVMBuilderRef builder, LLVMValueRef condition); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c index 0a5038bc98..2bce289555 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c @@ -35,6 +35,7 @@ #include "util/u_format.h" +#include "util/u_cpu_detect.h" #include "lp_bld_arit.h" #include "lp_bld_type.h" @@ -42,7 +43,7 @@ #include "lp_bld_conv.h" #include "lp_bld_gather.h" #include "lp_bld_format.h" - +#include "lp_bld_logic.h" /** * Extract Y, U, V channels from packed UYVY. @@ -59,7 +60,7 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder, LLVMValueRef *v) { struct lp_type type; - LLVMValueRef shift, mask; + LLVMValueRef mask; memset(&type, 0, sizeof type); type.width = 32; @@ -69,14 +70,37 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder, assert(lp_check_value(type, i)); /* - * y = (uyvy >> 16*i) & 0xff + * y = (uyvy >> (16*i + 8)) & 0xff * u = (uyvy ) & 0xff * v = (uyvy >> 16 ) & 0xff */ - shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); - shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); - *y = LLVMBuildLShr(builder, packed, shift, ""); +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + /* + * Avoid shift with per-element count. + * No support on x86, gets translated to roughly 5 instructions + * per element. Didn't measure performance but cuts shader size + * by quite a bit (less difference if cpu has no sse4.1 support). + */ + if (util_cpu_caps.has_sse2 && n == 4) { + LLVMValueRef sel, tmp, tmp2; + struct lp_build_context bld32; + + lp_build_context_init(&bld32, builder, type); + + tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), ""); + tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(type, 16), ""); + sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0)); + *y = lp_build_select(&bld32, sel, tmp, tmp2); + } else +#endif + { + LLVMValueRef shift; + shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); + shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); + *y = LLVMBuildLShr(builder, packed, shift, ""); + } + *u = packed; *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); @@ -103,7 +127,7 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder, LLVMValueRef *v) { struct lp_type type; - LLVMValueRef shift, mask; + LLVMValueRef mask; memset(&type, 0, sizeof type); type.width = 32; @@ -118,8 +142,30 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder, * v = (yuyv >> 24 ) & 0xff */ - shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); - *y = LLVMBuildLShr(builder, packed, shift, ""); +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + /* + * Avoid shift with per-element count. + * No support on x86, gets translated to roughly 5 instructions + * per element. Didn't measure performance but cuts shader size + * by quite a bit (less difference if cpu has no sse4.1 support). + */ + if (util_cpu_caps.has_sse2 && n == 4) { + LLVMValueRef sel, tmp; + struct lp_build_context bld32; + + lp_build_context_init(&bld32, builder, type); + + tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); + sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0)); + *y = lp_build_select(&bld32, sel, packed, tmp); + } else +#endif + { + LLVMValueRef shift; + shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); + *y = LLVMBuildLShr(builder, packed, shift, ""); + } + *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), ""); *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 761f33b578..5598ca5c48 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -44,6 +44,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = { { "asm", GALLIVM_DEBUG_ASM, NULL }, { "nopt", GALLIVM_DEBUG_NO_OPT, NULL }, { "perf", GALLIVM_DEBUG_PERF, NULL }, + { "no_brilinear", GALLIVM_DEBUG_NO_BRILINEAR, NULL }, DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index f26fdac466..0b4b1ca7d1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -47,4 +47,10 @@ lp_build_init(void); extern void lp_func_delete_body(LLVMValueRef func); + +extern LLVMValueRef +lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, + const char *Name); + + #endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index d5c62a3f73..026b60ac36 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -92,9 +92,23 @@ lp_build_compare(LLVMBuilderRef builder, if(func == PIPE_FUNC_ALWAYS) return ones; - /* TODO: optimize the constant case */ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + /* + * There are no unsigned integer comparison instructions in SSE. + */ - /* XXX: It is not clear if we should use the ordered or unordered operators */ + if (!type.floating && !type.sign && + type.width * type.length == 128 && + util_cpu_caps.has_sse2 && + (func == PIPE_FUNC_LESS || + func == PIPE_FUNC_LEQUAL || + func == PIPE_FUNC_GREATER || + func == PIPE_FUNC_GEQUAL) && + (gallivm_debug & GALLIVM_DEBUG_PERF)) { + debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", + __FUNCTION__, type.length, type.width); + } +#endif #if HAVE_LLVM < 0x0207 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) @@ -225,6 +239,8 @@ lp_build_compare(LLVMBuilderRef builder, #endif #endif /* HAVE_LLVM < 0x0207 */ + /* XXX: It is not clear if we should use the ordered or unordered operators */ + if(type.floating) { LLVMRealPredicate op; switch(func) { @@ -446,10 +462,12 @@ lp_build_select(struct lp_build_context *bld, LLVMTypeRef arg_type; LLVMValueRef args[3]; - if (type.width == 64) { + if (type.floating && + type.width == 64) { intrinsic = "llvm.x86.sse41.blendvpd"; arg_type = LLVMVectorType(LLVMDoubleType(), 2); - } else if (type.width == 32) { + } else if (type.floating && + type.width == 32) { intrinsic = "llvm.x86.sse41.blendvps"; arg_type = LLVMVectorType(LLVMFloatType(), 4); } else { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 48baf7c425..f56ddee7fd 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -178,3 +178,13 @@ lp_func_delete_body(LLVMValueRef FF) llvm::Function *func = llvm::unwrap<llvm::Function>(FF); func->deleteBody(); } + + +extern "C" +LLVMValueRef +lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, + const char *Name) +{ + return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name)); +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c index 153ba5b15b..f418e96aff 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c @@ -29,6 +29,8 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "util/u_string.h" +#include "lp_bld_const.h" #include "lp_bld_printf.h" @@ -119,3 +121,22 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...) return LLVMBuildCall(builder, func_printf, params, argcount + 1, ""); } + + +/** + * Print a float[4] vector. + */ +LLVMValueRef +lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec) +{ + char format[1000]; + LLVMValueRef x, y, z, w; + + x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(0), ""); + y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(1), ""); + z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(2), ""); + w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(3), ""); + + util_snprintf(format, sizeof(format), "%s %%f %%f %%f %%f\n", msg); + return lp_build_printf(builder, format, x, y, z, w); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.h b/src/gallium/auxiliary/gallivm/lp_bld_printf.h index 83bd8f1d55..b6222c62eb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h @@ -35,5 +35,9 @@ LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len); LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...); +LLVMValueRef +lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec); + + #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index 7b1088939b..c18c8b4710 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -81,11 +81,15 @@ LLVMValueRef lp_build_scalar_ddx(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef idx_left = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0); - LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0); - LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, ""); - LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, ""); - return lp_build_sub(bld, a_right, a_left); + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef idx_left = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0); + LLVMValueRef idx_right = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_RIGHT, 0); + LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "left"); + LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "right"); + if (bld->type.floating) + return LLVMBuildFSub(bld->builder, a_right, a_left, "ddx"); + else + return LLVMBuildSub(bld->builder, a_right, a_left, "ddx"); } @@ -93,9 +97,13 @@ LLVMValueRef lp_build_scalar_ddy(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef idx_top = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0); - LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0); - LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, ""); - LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, ""); - return lp_build_sub(bld, a_bottom, a_top); + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef idx_top = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0); + LLVMValueRef idx_bottom = LLVMConstInt(i32t, LP_BLD_QUAD_BOTTOM_LEFT, 0); + LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "top"); + LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "bottom"); + if (bld->type.floating) + return LLVMBuildFSub(bld->builder, a_bottom, a_top, "ddy"); + else + return LLVMBuildSub(bld->builder, a_bottom, a_top, "ddy"); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index e89ee7c230..844d1d935b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -39,12 +39,52 @@ #include "lp_bld_arit.h" #include "lp_bld_const.h" #include "lp_bld_debug.h" +#include "lp_bld_printf.h" #include "lp_bld_flow.h" #include "lp_bld_sample.h" #include "lp_bld_swizzle.h" #include "lp_bld_type.h" +/* + * Bri-linear factor. Should be greater than one. + */ +#define BRILINEAR_FACTOR 2 + + +/** + * Does the given texture wrap mode allow sampling the texture border color? + * XXX maybe move this into gallium util code. + */ +boolean +lp_sampler_wrap_mode_uses_border_color(unsigned mode, + unsigned min_img_filter, + unsigned mag_img_filter) +{ + switch (mode) { + case PIPE_TEX_WRAP_REPEAT: + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return FALSE; + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + if (min_img_filter == PIPE_TEX_FILTER_NEAREST && + mag_img_filter == PIPE_TEX_FILTER_NEAREST) { + return FALSE; + } else { + return TRUE; + } + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return TRUE; + default: + assert(0 && "unexpected wrap mode"); + return FALSE; + } +} + + /** * Initialize lp_sampler_static_state object with the gallium sampler * and texture state. @@ -93,31 +133,40 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->wrap_r = sampler->wrap_r; state->min_img_filter = sampler->min_img_filter; state->mag_img_filter = sampler->mag_img_filter; - if (view->last_level) { + + if (view->last_level && sampler->max_lod > 0.0f) { state->min_mip_filter = sampler->min_mip_filter; } else { state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; } + if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + if (sampler->lod_bias != 0.0f) { + state->lod_bias_non_zero = 1; + } + + /* If min_lod == max_lod we can greatly simplify mipmap selection. + * This is a case that occurs during automatic mipmap generation. + */ + if (sampler->min_lod == sampler->max_lod) { + state->min_max_lod_equal = 1; + } else { + if (sampler->min_lod > 0.0f) { + state->apply_min_lod = 1; + } + + if (sampler->max_lod < (float)view->last_level) { + state->apply_max_lod = 1; + } + } + } + state->compare_mode = sampler->compare_mode; if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { state->compare_func = sampler->compare_func; } state->normalized_coords = sampler->normalized_coords; - state->lod_bias = sampler->lod_bias; - if (!view->last_level && - sampler->min_img_filter == sampler->mag_img_filter) { - state->min_lod = 0.0f; - state->max_lod = 0.0f; - } else { - state->min_lod = MAX2(sampler->min_lod, 0.0f); - state->max_lod = sampler->max_lod; - } - state->border_color[0] = sampler->border_color[0]; - state->border_color[1] = sampler->border_color[1]; - state->border_color[2] = sampler->border_color[2]; - state->border_color[3] = sampler->border_color[3]; /* * FIXME: Handle the remainder of pipe_sampler_view. @@ -126,6 +175,220 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, /** + * Generate code to compute coordinate gradient (rho). + * \param ddx partial derivatives of (s, t, r, q) with respect to X + * \param ddy partial derivatives of (s, t, r, q) with respect to Y + * + * XXX: The resulting rho is scalar, so we ignore all but the first element of + * derivatives that are passed by the shader. + */ +static LLVMValueRef +lp_build_rho(struct lp_build_sample_context *bld, + const LLVMValueRef ddx[4], + const LLVMValueRef ddy[4]) +{ + struct lp_build_context *float_size_bld = &bld->float_size_bld; + struct lp_build_context *float_bld = &bld->float_bld; + const unsigned dims = bld->dims; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0); + LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0); + LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; + LLVMValueRef rho_x, rho_y; + LLVMValueRef rho_vec; + LLVMValueRef float_size; + LLVMValueRef rho; + + dsdx = ddx[0]; + dsdy = ddy[0]; + + if (dims <= 1) { + rho_x = dsdx; + rho_y = dsdy; + } + else { + rho_x = float_size_bld->undef; + rho_y = float_size_bld->undef; + + rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, ""); + rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, ""); + + dtdx = ddx[1]; + dtdy = ddy[1]; + + rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, ""); + rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, ""); + + if (dims >= 3) { + drdx = ddx[2]; + drdy = ddy[2]; + + rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, ""); + rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, ""); + } + } + + rho_x = lp_build_abs(float_size_bld, rho_x); + rho_y = lp_build_abs(float_size_bld, rho_y); + + rho_vec = lp_build_max(float_size_bld, rho_x, rho_y); + + float_size = lp_build_int_to_float(float_size_bld, bld->int_size); + + rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size); + + if (dims <= 1) { + rho = rho_vec; + } + else { + if (dims >= 2) { + LLVMValueRef rho_s, rho_t, rho_r; + + rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, ""); + rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, ""); + + rho = lp_build_max(float_bld, rho_s, rho_t); + + if (dims >= 3) { + rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, ""); + rho = lp_build_max(float_bld, rho, rho_r); + } + } + } + + return rho; +} + + +/* + * Bri-linear lod computation + * + * Use a piece-wise linear approximation of log2 such that: + * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc. + * - linear approximation for values in the neighborhood of 0.5, 1.5., etc, + * with the steepness specified in 'factor' + * - exact result for 0.5, 1.5, etc. + * + * + * 1.0 - /----* + * / + * / + * / + * 0.5 - * + * / + * / + * / + * 0.0 - *----/ + * + * | | + * 2^0 2^1 + * + * This is a technique also commonly used in hardware: + * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html + * + * TODO: For correctness, this should only be applied when texture is known to + * have regular mipmaps, i.e., mipmaps derived from the base level. + * + * TODO: This could be done in fixed point, where applicable. + */ +static void +lp_build_brilinear_lod(struct lp_build_context *bld, + LLVMValueRef lod, + double factor, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart) +{ + LLVMValueRef lod_fpart; + double pre_offset = (factor - 0.5)/factor - 0.5; + double post_offset = 1 - factor; + + if (0) { + lp_build_printf(bld->builder, "lod = %f\n", lod); + } + + lod = lp_build_add(bld, lod, + lp_build_const_vec(bld->type, pre_offset)); + + lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart); + + lod_fpart = lp_build_mul(bld, lod_fpart, + lp_build_const_vec(bld->type, factor)); + + lod_fpart = lp_build_add(bld, lod_fpart, + lp_build_const_vec(bld->type, post_offset)); + + /* + * It's not necessary to clamp lod_fpart since: + * - the above expression will never produce numbers greater than one. + * - the mip filtering branch is only taken if lod_fpart is positive + */ + + *out_lod_fpart = lod_fpart; + + if (0) { + lp_build_printf(bld->builder, "lod_ipart = %i\n", *out_lod_ipart); + lp_build_printf(bld->builder, "lod_fpart = %f\n\n", *out_lod_fpart); + } +} + + +/* + * Combined log2 and brilinear lod computation. + * + * It's in all identical to calling lp_build_fast_log2() and + * lp_build_brilinear_lod() above, but by combining we can compute the interger + * and fractional part independently. + */ +static void +lp_build_brilinear_rho(struct lp_build_context *bld, + LLVMValueRef rho, + double factor, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart) +{ + LLVMValueRef lod_ipart; + LLVMValueRef lod_fpart; + + const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor); + const double post_offset = 1 - 2*factor; + + assert(bld->type.floating); + + assert(lp_check_value(bld->type, rho)); + + /* + * The pre factor will make the intersections with the exact powers of two + * happen precisely where we want then to be, which means that the integer + * part will not need any post adjustments. + */ + rho = lp_build_mul(bld, rho, + lp_build_const_vec(bld->type, pre_factor)); + + /* ipart = ifloor(log2(rho)) */ + lod_ipart = lp_build_extract_exponent(bld, rho, 0); + + /* fpart = rho / 2**ipart */ + lod_fpart = lp_build_extract_mantissa(bld, rho); + + lod_fpart = lp_build_mul(bld, lod_fpart, + lp_build_const_vec(bld->type, factor)); + + lod_fpart = lp_build_add(bld, lod_fpart, + lp_build_const_vec(bld->type, post_offset)); + + /* + * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since: + * - the above expression will never produce numbers greater than one. + * - the mip filtering branch is only taken if lod_fpart is positive + */ + + *out_lod_ipart = lod_ipart; + *out_lod_fpart = lod_fpart; +} + + +/** * Generate code to compute texture level of detail (lambda). * \param ddx partial derivatives of (s, t, r, q) with respect to X * \param ddy partial derivatives of (s, t, r, q) with respect to Y @@ -138,83 +401,81 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, * XXX: The resulting lod is scalar, so ignore all but the first element of * derivatives, lod_bias, etc that are passed by the shader. */ -LLVMValueRef +void lp_build_lod_selector(struct lp_build_sample_context *bld, + unsigned unit, const LLVMValueRef ddx[4], const LLVMValueRef ddy[4], LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth) + unsigned mip_filter, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart) { - if (bld->static_state->min_lod == bld->static_state->max_lod) { + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef lod; + + *out_lod_ipart = bld->int_bld.zero; + *out_lod_fpart = bld->float_bld.zero; + + if (bld->static_state->min_max_lod_equal) { /* User is forcing sampling from a particular mipmap level. * This is hit during mipmap generation. */ - return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); + LLVMValueRef min_lod = + bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit); + + lod = min_lod; } else { - struct lp_build_context *float_bld = &bld->float_bld; - LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(), - bld->static_state->lod_bias); - LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), - bld->static_state->min_lod); - LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), - bld->static_state->max_lod); + LLVMValueRef sampler_lod_bias = + bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit); LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef lod; if (explicit_lod) { lod = LLVMBuildExtractElement(bld->builder, explicit_lod, index0, ""); } else { - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef dsdx, dsdy; - LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; LLVMValueRef rho; - dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); - dsdx = lp_build_abs(float_bld, dsdx); - dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); - dsdy = lp_build_abs(float_bld, dsdy); - if (dims > 1) { - dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); - dtdx = lp_build_abs(float_bld, dtdx); - dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); - dtdy = lp_build_abs(float_bld, dtdy); - if (dims > 2) { - drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); - drdx = lp_build_abs(float_bld, drdx); - drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); - drdy = lp_build_abs(float_bld, drdy); - } - } + rho = lp_build_rho(bld, ddx, ddy); - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this could be vectorized somewhat + /* + * Compute lod = log2(rho) */ - rho = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, dsdx, dsdy), - lp_build_int_to_float(float_bld, width), ""); - if (dims > 1) { - LLVMValueRef max; - max = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, dtdx, dtdy), - lp_build_int_to_float(float_bld, height), ""); - rho = lp_build_max(float_bld, rho, max); - if (dims > 2) { - max = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, drdx, drdy), - lp_build_int_to_float(float_bld, depth), ""); - rho = lp_build_max(float_bld, rho, max); + + if (!lod_bias && + !bld->static_state->lod_bias_non_zero && + !bld->static_state->apply_max_lod && + !bld->static_state->apply_min_lod) { + /* + * Special case when there are no post-log2 adjustments, which + * saves instructions but keeping the integer and fractional lod + * computations separate from the start. + */ + + if (mip_filter == PIPE_TEX_MIPFILTER_NONE || + mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + *out_lod_ipart = lp_build_ilog2(float_bld, rho); + *out_lod_fpart = bld->float_bld.zero; + return; + } + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && + !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { + lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR, + out_lod_ipart, out_lod_fpart); + return; } } - /* compute lod = log2(rho) */ - lod = lp_build_log2(float_bld, rho); + if (0) { + lod = lp_build_log2(float_bld, rho); + } + else { + lod = lp_build_fast_log2(float_bld, rho); + } /* add shader lod bias */ if (lod_bias) { @@ -225,13 +486,43 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, } /* add sampler lod bias */ - lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); + if (bld->static_state->lod_bias_non_zero) + lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); + /* clamp lod */ - lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); + if (bld->static_state->apply_max_lod) { + LLVMValueRef max_lod = + bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit); - return lod; + lod = lp_build_min(float_bld, lod, max_lod); + } + if (bld->static_state->apply_min_lod) { + LLVMValueRef min_lod = + bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit); + + lod = lp_build_max(float_bld, lod, min_lod); + } + } + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { + lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR, + out_lod_ipart, out_lod_fpart); + } + else { + lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart); + } + + lp_build_name(*out_lod_fpart, "lod_fpart"); + } + else { + *out_lod_ipart = lp_build_iround(float_bld, lod); } + + lp_build_name(*out_lod_ipart, "lod_ipart"); + + return; } @@ -245,10 +536,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, void lp_build_nearest_mip_level(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef lod, + LLVMValueRef lod_ipart, LLVMValueRef *level_out) { - struct lp_build_context *float_bld = &bld->float_bld; struct lp_build_context *int_bld = &bld->int_bld; LLVMValueRef last_level, level; @@ -258,7 +548,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, bld->builder, unit); /* convert float lod to integer */ - level = lp_build_iround(float_bld, lod); + level = lod_ipart; /* clamp level to legal range of levels */ *level_out = lp_build_clamp(int_bld, level, zero, last_level); @@ -273,43 +563,77 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, void lp_build_linear_mip_levels(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef lod, + LLVMValueRef lod_ipart, + LLVMValueRef *lod_fpart_inout, LLVMValueRef *level0_out, - LLVMValueRef *level1_out, - LLVMValueRef *weight_out) + LLVMValueRef *level1_out) { - struct lp_build_context *float_bld = &bld->float_bld; + LLVMBuilderRef builder = bld->builder; struct lp_build_context *int_bld = &bld->int_bld; - LLVMValueRef last_level, level; + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef last_level; + LLVMValueRef clamp_min; + LLVMValueRef clamp_max; + + *level0_out = lod_ipart; + *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one); last_level = bld->dynamic_state->last_level(bld->dynamic_state, bld->builder, unit); - /* convert float lod to integer */ - level = lp_build_ifloor(float_bld, lod); - - /* compute level 0 and clamp to legal range of levels */ - *level0_out = lp_build_clamp(int_bld, level, - int_bld->zero, - last_level); - /* compute level 1 and clamp to legal range of levels */ - level = lp_build_add(int_bld, level, int_bld->one); - *level1_out = lp_build_clamp(int_bld, level, - int_bld->zero, - last_level); - - *weight_out = lp_build_fract(float_bld, lod); + /* + * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the + * minimum number of comparisons, and zeroing lod_fpart in the extreme + * ends in the process. + */ + + /* lod_ipart < 0 */ + clamp_min = LLVMBuildICmp(builder, LLVMIntSLT, + lod_ipart, int_bld->zero, + "clamp_lod_to_zero"); + + *level0_out = LLVMBuildSelect(builder, clamp_min, + int_bld->zero, *level0_out, ""); + + *level1_out = LLVMBuildSelect(builder, clamp_min, + int_bld->zero, *level1_out, ""); + + *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min, + float_bld->zero, *lod_fpart_inout, ""); + + /* lod_ipart >= last_level */ + clamp_max = LLVMBuildICmp(builder, LLVMIntSGE, + lod_ipart, last_level, + "clamp_lod_to_last"); + + *level0_out = LLVMBuildSelect(builder, clamp_max, + last_level, *level0_out, ""); + + *level1_out = LLVMBuildSelect(builder, clamp_max, + last_level, *level1_out, ""); + + *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max, + float_bld->zero, *lod_fpart_inout, ""); + + lp_build_name(*level0_out, "sampler%u_miplevel0", unit); + lp_build_name(*level1_out, "sampler%u_miplevel1", unit); + lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit); } +/** + * Return pointer to a single mipmap level. + * \param data_array array of pointers to mipmap levels + * \param level integer mipmap level + */ LLVMValueRef lp_build_get_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, LLVMValueRef level) + LLVMValueRef level) { LLVMValueRef indexes[2], data_ptr; indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); indexes[1] = level; - data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); + data_ptr = LLVMBuildGEP(bld->builder, bld->data_array, indexes, 2, ""); data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); return data_ptr; } @@ -317,10 +641,10 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld, LLVMValueRef lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, int level) + int level) { LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); - return lp_build_get_mipmap_level(bld, data_array, lvl); + return lp_build_get_mipmap_level(bld, lvl); } @@ -329,13 +653,24 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, * Return max(1, base_size >> level); */ static LLVMValueRef -lp_build_minify(struct lp_build_sample_context *bld, +lp_build_minify(struct lp_build_context *bld, LLVMValueRef base_size, LLVMValueRef level) { - LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify"); - size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); - return size; + assert(lp_check_value(bld->type, base_size)); + assert(lp_check_value(bld->type, level)); + + if (level == bld->zero) { + /* if we're using mipmap level zero, no minification is needed */ + return base_size; + } + else { + LLVMValueRef size = + LLVMBuildLShr(bld->builder, base_size, level, "minify"); + assert(bld->type.sign); + size = lp_build_max(bld, size, bld->one); + return size; + } } @@ -364,71 +699,113 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, */ void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, - unsigned dims, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef ilevel0, - LLVMValueRef ilevel1, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef *width0_vec, - LLVMValueRef *width1_vec, - LLVMValueRef *height0_vec, - LLVMValueRef *height1_vec, - LLVMValueRef *depth0_vec, - LLVMValueRef *depth1_vec, - LLVMValueRef *row_stride0_vec, - LLVMValueRef *row_stride1_vec, - LLVMValueRef *img_stride0_vec, - LLVMValueRef *img_stride1_vec) + LLVMValueRef ilevel, + LLVMValueRef *out_size, + LLVMValueRef *row_stride_vec, + LLVMValueRef *img_stride_vec) { - const unsigned mip_filter = bld->static_state->min_mip_filter; - LLVMValueRef ilevel0_vec, ilevel1_vec; + const unsigned dims = bld->dims; + LLVMValueRef ilevel_vec; - ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) - ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); + ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); /* - * Compute width, height, depth at mipmap level 'ilevel0' + * Compute width, height, depth at mipmap level 'ilevel' */ - *width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); + *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); + if (dims >= 2) { - *height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); - *row_stride0_vec = lp_build_get_level_stride_vec(bld, - row_stride_array, - ilevel0); + *row_stride_vec = lp_build_get_level_stride_vec(bld, + bld->row_stride_array, + ilevel); if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { - *img_stride0_vec = lp_build_get_level_stride_vec(bld, - img_stride_array, - ilevel0); - if (dims == 3) { - *depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); - } + *img_stride_vec = lp_build_get_level_stride_vec(bld, + bld->img_stride_array, + ilevel); } } - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* compute width, height, depth for second mipmap level at 'ilevel1' */ - *width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); - if (dims >= 2) { - *height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); - *row_stride1_vec = lp_build_get_level_stride_vec(bld, - row_stride_array, - ilevel1); - if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { - *img_stride1_vec = lp_build_get_level_stride_vec(bld, - img_stride_array, - ilevel1); - if (dims == 3) { - *depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); - } - } +} + + +/** + * Extract and broadcast texture size. + * + * @param size_type type of the texture size vector (either + * bld->int_size_type or bld->float_size_type) + * @param coord_type type of the texture size vector (either + * bld->int_coord_type or bld->coord_type) + * @param int_size vector with the integer texture size (width, height, + * depth) + */ +void +lp_build_extract_image_sizes(struct lp_build_sample_context *bld, + struct lp_type size_type, + struct lp_type coord_type, + LLVMValueRef size, + LLVMValueRef *out_width, + LLVMValueRef *out_height, + LLVMValueRef *out_depth) +{ + const unsigned dims = bld->dims; + LLVMTypeRef i32t = LLVMInt32Type(); + + *out_width = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 0, 0)); + if (dims >= 2) { + *out_height = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 1, 0)); + if (dims == 3) { + *out_depth = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 2, 0)); } } } +/** + * Unnormalize coords. + * + * @param int_size vector with the integer texture size (width, height, depth) + */ +void +lp_build_unnormalized_coords(struct lp_build_sample_context *bld, + LLVMValueRef flt_size, + LLVMValueRef *s, + LLVMValueRef *t, + LLVMValueRef *r) +{ + const unsigned dims = bld->dims; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef depth; + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &width, + &height, + &depth); + + /* s = s * width, t = t * height */ + *s = lp_build_mul(&bld->coord_bld, *s, width); + if (dims >= 2) { + *t = lp_build_mul(&bld->coord_bld, *t, height); + if (dims >= 3) { + *r = lp_build_mul(&bld->coord_bld, *r, depth); + } + } +} + /** Helper used by lp_build_cube_lookup() */ static LLVMValueRef @@ -547,25 +924,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); { - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; + LLVMValueRef face_s_var; + LLVMValueRef face_t_var; + LLVMValueRef face_var; - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); - - *face_s = bld->coord_bld.undef; - *face_t = bld->coord_bld.undef; - *face = bld->int_bld.undef; - - lp_build_name(*face_s, "face_s"); - lp_build_name(*face_t, "face_t"); - lp_build_name(*face, "face"); + face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var"); + face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var"); + face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var"); - lp_build_flow_scope_declare(flow_ctx, face_s); - lp_build_flow_scope_declare(flow_ctx, face_t); - lp_build_flow_scope_declare(flow_ctx, face); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); + lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz); { /* +/- X face */ LLVMValueRef sign = lp_build_sgn(float_bld, rx); @@ -575,57 +943,52 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, *face = lp_build_cube_face(bld, rx, PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_else(&if_ctx); { - struct lp_build_flow_context *flow_ctx2; struct lp_build_if_state if_ctx2; - LLVMValueRef face_s2 = bld->coord_bld.undef; - LLVMValueRef face_t2 = bld->coord_bld.undef; - LLVMValueRef face2 = bld->int_bld.undef; - - flow_ctx2 = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx2); - lp_build_flow_scope_declare(flow_ctx2, &face_s2); - lp_build_flow_scope_declare(flow_ctx2, &face_t2); - lp_build_flow_scope_declare(flow_ctx2, &face2); - ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); - lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); + lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz); { /* +/- Y face */ LLVMValueRef sign = lp_build_sgn(float_bld, ry); LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); - face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); - face2 = lp_build_cube_face(bld, ry, + *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); + *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima); + *face = lp_build_cube_face(bld, ry, PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_else(&if_ctx2); { /* +/- Z face */ LLVMValueRef sign = lp_build_sgn(float_bld, rz); LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); - face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); - face2 = lp_build_cube_face(bld, rz, + *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima); + *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + *face = lp_build_cube_face(bld, rz, PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_endif(&if_ctx2); - lp_build_flow_scope_end(flow_ctx2); - lp_build_flow_destroy(flow_ctx2); - *face_s = face_s2; - *face_t = face_t2; - *face = face2; } lp_build_endif(&if_ctx); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + + *face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s"); + *face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t"); + *face = LLVMBuildLoad(bld->builder, face_var, "face"); } } @@ -659,11 +1022,21 @@ lp_build_sample_partial_offset(struct lp_build_context *bld, * Pixel blocks have power of two dimensions. LLVM should convert the * rem/div to bit arithmetic. * TODO: Verify this. + * It does indeed BUT it does transform it to scalar (and back) when doing so + * (using roughly extract, shift/and, mov, unpack) (llvm 2.7). + * The generated code looks seriously unfunny and is quite expensive. */ - +#if 0 LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length); subcoord = LLVMBuildURem(bld->builder, coord, block_width, ""); coord = LLVMBuildUDiv(bld->builder, coord, block_width, ""); +#else + unsigned logbase2 = util_unsigned_logbase2(block_length); + LLVMValueRef block_shift = lp_build_const_int_vec(bld->type, logbase2); + LLVMValueRef block_mask = lp_build_const_int_vec(bld->type, block_length - 1); + subcoord = LLVMBuildAnd(bld->builder, coord, block_mask, ""); + coord = LLVMBuildLShr(bld->builder, coord, block_shift, ""); +#endif } offset = lp_build_mul(bld, coord, stride); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 8b042d5242..ffed27cee8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -82,12 +82,10 @@ struct lp_sampler_static_state unsigned compare_mode:1; unsigned compare_func:3; unsigned normalized_coords:1; - float lod_bias, min_lod, max_lod; - float border_color[4]; - - /* Aero hacks */ - unsigned force_nearest_s:1; - unsigned force_nearest_t:1; + unsigned min_max_lod_equal:1; /**< min_lod == max_lod ? */ + unsigned lod_bias_non_zero:1; + unsigned apply_min_lod:1; /**< min_lod > 0 ? */ + unsigned apply_max_lod:1; /**< max_lod < last_level ? */ }; @@ -104,45 +102,67 @@ struct lp_sampler_static_state struct lp_sampler_dynamic_state { - /** Obtain the base texture width. */ + /** Obtain the base texture width (returns int32) */ LLVMValueRef (*width)( const struct lp_sampler_dynamic_state *state, LLVMBuilderRef builder, unsigned unit); - /** Obtain the base texture height. */ + /** Obtain the base texture height (returns int32) */ LLVMValueRef (*height)( const struct lp_sampler_dynamic_state *state, LLVMBuilderRef builder, unsigned unit); - /** Obtain the base texture depth. */ + /** Obtain the base texture depth (returns int32) */ LLVMValueRef (*depth)( const struct lp_sampler_dynamic_state *state, LLVMBuilderRef builder, unsigned unit); - /** Obtain the number of mipmap levels (minus one). */ + /** Obtain the number of mipmap levels minus one (returns int32) */ LLVMValueRef (*last_level)( const struct lp_sampler_dynamic_state *state, LLVMBuilderRef builder, unsigned unit); + /** Obtain stride in bytes between image rows/blocks (returns int32) */ LLVMValueRef (*row_stride)( const struct lp_sampler_dynamic_state *state, LLVMBuilderRef builder, unsigned unit); + /** Obtain stride in bytes between image slices (returns int32) */ LLVMValueRef (*img_stride)( const struct lp_sampler_dynamic_state *state, LLVMBuilderRef builder, unsigned unit); + /** Obtain pointer to array of pointers to mimpap levels */ LLVMValueRef (*data_ptr)( const struct lp_sampler_dynamic_state *state, LLVMBuilderRef builder, unsigned unit); + /** Obtain texture min lod (returns float) */ + LLVMValueRef + (*min_lod)(const struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, unsigned unit); + + /** Obtain texture max lod (returns float) */ + LLVMValueRef + (*max_lod)(const struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, unsigned unit); + + /** Obtain texture lod bias (returns float) */ + LLVMValueRef + (*lod_bias)(const struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, unsigned unit); + + /** Obtain texture border color (returns ptr to float[4]) */ + LLVMValueRef + (*border_color)(const struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, unsigned unit); }; @@ -159,10 +179,16 @@ struct lp_build_sample_context const struct util_format_description *format_desc; + /* See texture_dims() */ + unsigned dims; + /** regular scalar float type */ struct lp_type float_type; struct lp_build_context float_bld; + /** float vector type */ + struct lp_build_context float_vec_bld; + /** regular scalar float type */ struct lp_type int_type; struct lp_build_context int_bld; @@ -171,17 +197,32 @@ struct lp_build_sample_context struct lp_type coord_type; struct lp_build_context coord_bld; - /** Unsigned integer coordinates */ - struct lp_type uint_coord_type; - struct lp_build_context uint_coord_bld; - /** Signed integer coordinates */ struct lp_type int_coord_type; struct lp_build_context int_coord_bld; + /** Unsigned integer texture size */ + struct lp_type int_size_type; + struct lp_build_context int_size_bld; + + /** Unsigned integer texture size */ + struct lp_type float_size_type; + struct lp_build_context float_size_bld; + /** Output texels type and build context */ struct lp_type texel_type; struct lp_build_context texel_bld; + + /* Common dynamic state values */ + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef depth; + LLVMValueRef row_stride_array; + LLVMValueRef img_stride_array; + LLVMValueRef data_array; + + /** Integer vector with texture width, height, depth */ + LLVMValueRef int_size; }; @@ -218,7 +259,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld, } -static INLINE int +static INLINE unsigned texture_dims(enum pipe_texture_target tex) { switch (tex) { @@ -237,6 +278,11 @@ texture_dims(enum pipe_texture_target tex) } +boolean +lp_sampler_wrap_mode_uses_border_color(unsigned mode, + unsigned min_img_filter, + unsigned mag_img_filter); + /** * Derive the sampler static state. */ @@ -246,15 +292,16 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); -LLVMValueRef +void lp_build_lod_selector(struct lp_build_sample_context *bld, + unsigned unit, const LLVMValueRef ddx[4], const LLVMValueRef ddy[4], LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth); + unsigned mip_filter, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart); void lp_build_nearest_mip_level(struct lp_build_sample_context *bld, @@ -265,40 +312,44 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, void lp_build_linear_mip_levels(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef lod, + LLVMValueRef lod_ipart, + LLVMValueRef *lod_fpart_inout, LLVMValueRef *level0_out, - LLVMValueRef *level1_out, - LLVMValueRef *weight_out); + LLVMValueRef *level1_out); LLVMValueRef lp_build_get_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, LLVMValueRef level); + LLVMValueRef level); LLVMValueRef lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, int level); + int level); void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, - unsigned dims, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef ilevel0, - LLVMValueRef ilevel1, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef *width0_vec, - LLVMValueRef *width1_vec, - LLVMValueRef *height0_vec, - LLVMValueRef *height1_vec, - LLVMValueRef *depth0_vec, - LLVMValueRef *depth1_vec, - LLVMValueRef *row_stride0_vec, - LLVMValueRef *row_stride1_vec, - LLVMValueRef *img_stride0_vec, - LLVMValueRef *img_stride1_vec); + LLVMValueRef ilevel, + LLVMValueRef *out_size_vec, + LLVMValueRef *row_stride_vec, + LLVMValueRef *img_stride_vec); + + +void +lp_build_extract_image_sizes(struct lp_build_sample_context *bld, + struct lp_type size_type, + struct lp_type coord_type, + LLVMValueRef size, + LLVMValueRef *out_width, + LLVMValueRef *out_height, + LLVMValueRef *out_depth); + + +void +lp_build_unnormalized_coords(struct lp_build_sample_context *bld, + LLVMValueRef flt_size, + LLVMValueRef *s, + LLVMValueRef *t, + LLVMValueRef *r); void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 7e064900e7..d3e3b242af 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -45,6 +45,7 @@ #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" @@ -80,20 +81,21 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, LLVMValueRef *out_offset, LLVMValueRef *out_i) { - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef length_minus_one; - length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if(is_pot) coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); - else - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ + else { + /* Add a bias to the texcoord to handle negative coords */ + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); + coord = LLVMBuildAdd(bld->builder, coord, bias, ""); coord = LLVMBuildURem(bld->builder, coord, length, ""); + } break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: @@ -111,7 +113,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, assert(0); } - lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, + lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, out_offset, out_i); } @@ -144,7 +146,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, LLVMValueRef *i0, LLVMValueRef *i1) { - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef length_minus_one; LLVMValueRef lmask, umask, mask; @@ -186,8 +187,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, * multiplication. */ - *i0 = uint_coord_bld->zero; - *i1 = uint_coord_bld->zero; + *i0 = int_coord_bld->zero; + *i1 = int_coord_bld->zero; length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); @@ -197,17 +198,18 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); } else { - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ + /* Add a bias to the texcoord to handle negative coords */ + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); + coord0 = LLVMBuildAdd(bld->builder, coord0, bias, ""); coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); } mask = lp_build_compare(bld->builder, int_coord_bld->type, PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); + *offset0 = lp_build_mul(int_coord_bld, coord0, stride); *offset1 = LLVMBuildAnd(bld->builder, - lp_build_add(uint_coord_bld, *offset0, stride), + lp_build_add(int_coord_bld, *offset0, stride), mask, ""); break; @@ -222,8 +224,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); - *offset1 = lp_build_add(uint_coord_bld, + *offset0 = lp_build_mul(int_coord_bld, coord0, stride); + *offset1 = lp_build_add(int_coord_bld, *offset0, LLVMBuildAnd(bld->builder, stride, mask, "")); break; @@ -236,8 +238,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: default: assert(0); - *offset0 = uint_coord_bld->zero; - *offset1 = uint_coord_bld->zero; + *offset0 = int_coord_bld->zero; + *offset1 = int_coord_bld->zero; break; } } @@ -250,9 +252,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_nearest(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -262,11 +262,12 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { - const int dims = texture_dims(bld->static_state->target); + const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8; + LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, t_ipart, r_ipart; LLVMValueRef x_stride; LLVMValueRef x_offset, offset; @@ -280,30 +281,33 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); + if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -321,7 +325,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); /* get pixel, row, image strides */ - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + x_stride = lp_build_const_vec(bld->int_coord_bld.type, bld->format_desc->block.bits/8); /* Do texcoord wrapping, compute texel offset */ @@ -340,7 +344,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, bld->static_state->pot_height, bld->static_state->wrap_t, &y_offset, &y_subcoord); - offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset); + offset = lp_build_add(&bld->int_coord_bld, offset, y_offset); if (dims >= 3) { LLVMValueRef z_offset; lp_build_sample_wrap_nearest_int(bld, @@ -349,13 +353,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, bld->static_state->pot_height, bld->static_state->wrap_r, &z_offset, &z_subcoord); - offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; /* The r coord is the cube face in [0,5] */ - z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); - offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); + offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); } } @@ -414,9 +418,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -426,11 +428,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { - const int dims = texture_dims(bld->static_state->target); + const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi; @@ -455,30 +458,33 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); + if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - } - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -489,10 +495,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* subtract 0.5 (add -128) */ i32_c128 = lp_build_const_int_vec(i32.type, -128); - if (!bld->static_state->force_nearest_s) { - s = LLVMBuildAdd(builder, s, i32_c128, ""); - } - if (dims >= 2 && !bld->static_state->force_nearest_t) { + s = LLVMBuildAdd(builder, s, i32_c128, ""); + if (dims >= 2) { t = LLVMBuildAdd(builder, t, i32_c128, ""); } if (dims >= 3) { @@ -516,7 +520,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); /* get pixel, row and image strides */ - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + x_stride = lp_build_const_vec(bld->int_coord_bld.type, bld->format_desc->block.bits/8); y_stride = row_stride_vec; z_stride = img_stride_vec; @@ -547,9 +551,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, for (z = 0; z < 2; z++) { for (x = 0; x < 2; x++) { - offset[z][0][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][0][x] = lp_build_add(&bld->int_coord_bld, offset[z][0][x], y_offset0); - offset[z][1][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][1][x] = lp_build_add(&bld->int_coord_bld, offset[z][1][x], y_offset1); } } @@ -565,20 +569,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, &z_subcoord[0], &z_subcoord[1]); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { - offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset0); - offset[1][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[1][y][x] = lp_build_add(&bld->int_coord_bld, offset[1][y][x], z_offset1); } } } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; - z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); + z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { /* The r coord is the cube face in [0,5] */ - offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset); } } @@ -709,82 +713,56 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* * Linear interpolation with 8.8 fixed point. */ - if (bld->static_state->force_nearest_s) { - /* special case 1-D lerp */ - packed_lo = lp_build_lerp(&h16, - t_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1]); - - packed_hi = lp_build_lerp(&h16, - t_fpart_hi, - neighbors_hi[0][1][0], - neighbors_hi[0][1][0]); - } - else if (bld->static_state->force_nearest_t) { - /* special case 1-D lerp */ + if (dims == 1) { + /* 1-D lerp */ packed_lo = lp_build_lerp(&h16, - s_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1]); + s_fpart_lo, + neighbors_lo[0][0][0], + neighbors_lo[0][0][1]); packed_hi = lp_build_lerp(&h16, - s_fpart_hi, - neighbors_hi[0][0][0], - neighbors_hi[0][0][1]); + s_fpart_hi, + neighbors_hi[0][0][0], + neighbors_hi[0][0][1]); } else { - /* general 1/2/3-D lerping */ - if (dims == 1) { - packed_lo = lp_build_lerp(&h16, - s_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1]); - - packed_hi = lp_build_lerp(&h16, - s_fpart_hi, - neighbors_hi[0][0][0], - neighbors_hi[0][0][1]); - } - else { - /* 2-D lerp */ - packed_lo = lp_build_lerp_2d(&h16, - s_fpart_lo, t_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1], - neighbors_lo[0][1][0], - neighbors_lo[0][1][1]); - - packed_hi = lp_build_lerp_2d(&h16, - s_fpart_hi, t_fpart_hi, - neighbors_hi[0][0][0], - neighbors_hi[0][0][1], - neighbors_hi[0][1][0], - neighbors_hi[0][1][1]); - - if (dims >= 3) { - LLVMValueRef packed_lo2, packed_hi2; - - /* lerp in the second z slice */ - packed_lo2 = lp_build_lerp_2d(&h16, - s_fpart_lo, t_fpart_lo, - neighbors_lo[1][0][0], - neighbors_lo[1][0][1], - neighbors_lo[1][1][0], - neighbors_lo[1][1][1]); - - packed_hi2 = lp_build_lerp_2d(&h16, - s_fpart_hi, t_fpart_hi, - neighbors_hi[1][0][0], - neighbors_hi[1][0][1], - neighbors_hi[1][1][0], - neighbors_hi[1][1][1]); - /* interp between two z slices */ - packed_lo = lp_build_lerp(&h16, r_fpart_lo, - packed_lo, packed_lo2); - packed_hi = lp_build_lerp(&h16, r_fpart_hi, - packed_hi, packed_hi2); - } + /* 2-D lerp */ + packed_lo = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[0][0][0], + neighbors_lo[0][0][1], + neighbors_lo[0][1][0], + neighbors_lo[0][1][1]); + + packed_hi = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[0][0][0], + neighbors_hi[0][0][1], + neighbors_hi[0][1][0], + neighbors_hi[0][1][1]); + + if (dims >= 3) { + LLVMValueRef packed_lo2, packed_hi2; + + /* lerp in the second z slice */ + packed_lo2 = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[1][0][0], + neighbors_lo[1][0][1], + neighbors_lo[1][1][0], + neighbors_lo[1][1][1]); + + packed_hi2 = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[1][0][0], + neighbors_hi[1][0][1], + neighbors_hi[1][1][0], + neighbors_hi[1][1][1]); + /* interp between two z slices */ + packed_lo = lp_build_lerp(&h16, r_fpart_lo, + packed_lo, packed_lo2); + packed_hi = lp_build_lerp(&h16, r_fpart_hi, + packed_hi, packed_hi2); } } @@ -806,76 +784,124 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + LLVMValueRef ilevel0, + LLVMValueRef ilevel1, LLVMValueRef lod_fpart, - LLVMValueRef width0_vec, - LLVMValueRef width1_vec, - LLVMValueRef height0_vec, - LLVMValueRef height1_vec, - LLVMValueRef depth0_vec, - LLVMValueRef depth1_vec, - LLVMValueRef row_stride0_vec, - LLVMValueRef row_stride1_vec, - LLVMValueRef img_stride0_vec, - LLVMValueRef img_stride1_vec, - LLVMValueRef data_ptr0, - LLVMValueRef data_ptr1, - LLVMValueRef *colors_lo, - LLVMValueRef *colors_hi) + LLVMValueRef colors_lo_var, + LLVMValueRef colors_hi_var) { + LLVMBuilderRef builder = bld->builder; + LLVMValueRef size0; + LLVMValueRef size1; + LLVMValueRef row_stride0_vec; + LLVMValueRef row_stride1_vec; + LLVMValueRef img_stride0_vec; + LLVMValueRef img_stride1_vec; + LLVMValueRef data_ptr0; + LLVMValueRef data_ptr1; LLVMValueRef colors0_lo, colors0_hi; LLVMValueRef colors1_lo, colors1_hi; + + /* sample the first mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel0, + &size0, + &row_stride0_vec, &img_stride0_vec); + data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { - /* sample the first mipmap level */ lp_build_sample_image_nearest(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_nearest(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, - &colors1_lo, &colors1_hi); - } } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); - - /* sample the first mipmap level */ lp_build_sample_image_linear(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_linear(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, - &colors1_lo, &colors1_hi); - } } + /* Store the first level's colors in the output variables */ + LLVMBuildStore(builder, colors0_lo, colors_lo_var); + LLVMBuildStore(builder, colors0_hi, colors_hi_var); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* interpolate samples from the two mipmap levels */ - struct lp_build_context h16; - lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16)); - - *colors_lo = lp_build_lerp(&h16, lod_fpart, - colors0_lo, colors1_lo); - *colors_hi = lp_build_lerp(&h16, lod_fpart, - colors0_hi, colors1_hi); - } - else { - /* use first/only level's colors */ - *colors_lo = colors0_lo; - *colors_hi = colors0_hi; + LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0); + LLVMTypeRef i32_type = LLVMIntType(32); + struct lp_build_if_state if_ctx; + LLVMValueRef need_lerp; + + lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, ""); + lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16"); + + /* need_lerp = lod_fpart > 0 */ + need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, + lod_fpart, LLVMConstNull(i32_type), + "need_lerp"); + + lp_build_if(&if_ctx, builder, need_lerp); + { + struct lp_build_context h16_bld; + + lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16)); + + /* sample the second mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel1, + &size1, + &row_stride1_vec, &img_stride1_vec); + data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + &colors1_lo, &colors1_hi); + } + else { + lp_build_sample_image_linear(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + &colors1_lo, &colors1_hi); + } + + /* interpolate samples from the two mipmap levels */ + + lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); + lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); + +#if HAVE_LLVM == 0x208 + /* This is a work-around for a bug in LLVM 2.8. + * Evidently, something goes wrong in the construction of the + * lod_fpart short[8] vector. Adding this no-effect shuffle seems + * to force the vector to be properly constructed. + * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). + */ + { + LLVMValueRef shuffles[8], shuffle; + int i; + assert(h16_bld.type.length <= Elements(shuffles)); + for (i = 0; i < h16_bld.type.length; i++) + shuffles[i] = lp_build_const_int32(2 * (i & 1)); + shuffle = LLVMConstVector(shuffles, h16_bld.type.length); + lod_fpart = LLVMBuildShuffleVector(builder, + lod_fpart, lod_fpart, + shuffle, ""); + } +#endif + + colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, + colors0_lo, colors1_lo); + colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, + colors0_hi, colors1_hi); + + LLVMBuildStore(builder, colors0_lo, colors_lo_var); + LLVMBuildStore(builder, colors0_hi, colors_hi_var); + } + lp_build_endif(&if_ctx); } } @@ -896,35 +922,22 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef data_array, LLVMValueRef texel_out[4]) { - struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; LLVMBuilderRef builder = bld->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef lod = NULL, lod_fpart = NULL; + const unsigned dims = bld->dims; + LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; - LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; - LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; - LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; - LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; - LLVMValueRef data_ptr0, data_ptr1 = NULL; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; LLVMValueRef face_ddx[4], face_ddy[4]; - struct lp_build_context h16; - LLVMTypeRef h16_vec_type; + struct lp_build_context h16_bld; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0); /* we only support the common/simple wrap modes at this time */ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); @@ -935,9 +948,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* make 16-bit fixed-pt builder context */ - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - h16_vec_type = lp_build_vec_type(h16.type); - + lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16)); /* cube face selection, compute pre-face coords, etc. */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { @@ -949,19 +960,18 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ - face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); - face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; - face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); - face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; ddy = face_ddy; } - /* * Compute the level of detail (float). */ @@ -970,15 +980,16 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, ddx, ddy, - lod_bias, explicit_lod, - width, height, depth); + lp_build_lod_selector(bld, unit, ddx, ddy, + lod_bias, explicit_lod, + mip_filter, + &lod_ipart, &lod_fpart); + } else { + lod_ipart = i32t_zero; } /* * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 - * If mipfilter=linear, also compute the weight between the two - * mipmap levels: lod_fpart */ switch (mip_filter) { default: @@ -991,135 +1002,81 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ - lod = lp_build_const_elem(bld->coord_bld.type, 0.0); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { - ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + ilevel0 = i32t_zero; } break; case PIPE_TEX_MIPFILTER_NEAREST: - assert(lod); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); break; case PIPE_TEX_MIPFILTER_LINEAR: - { - LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0); - LLVMValueRef i255 = lp_build_const_int32(255); - LLVMTypeRef i16_type = LLVMIntType(16); - - assert(lod); - - lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, - &lod_fpart); - lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, ""); - lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart); - lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, ""); - lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, ""); - lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart); - - /* the lod_fpart values will be fixed pt values in [0,1) */ - } + assert(lod_ipart); + assert(lod_fpart); + lp_build_linear_mip_levels(bld, unit, + lod_ipart, &lod_fpart, + &ilevel0, &ilevel1); break; } - /* compute image size(s) of source mipmap level(s) */ - lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec, - ilevel0, ilevel1, - row_stride_array, img_stride_array, - &width0_vec, &width1_vec, - &height0_vec, &height1_vec, - &depth0_vec, &depth1_vec, - &row_stride0_vec, &row_stride1_vec, - &img_stride0_vec, &img_stride1_vec); - /* - * Get pointer(s) to image data for mipmap level(s). + * Get/interpolate texture colors. */ - data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); - } + packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo"); + packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi"); - /* - * Get/interpolate texture colors. - */ if (min_filter == mag_filter) { /* no need to distinquish between minification and magnification */ - lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + packed_lo, packed_hi); } else { /* Emit conditional to choose min image filter or mag image filter * depending on the lod being > 0 or <= 0, respectively. */ - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; LLVMValueRef minify; - flow_ctx = lp_build_flow_create(builder); - lp_build_flow_scope_begin(flow_ctx); - - packed_lo = LLVMGetUndef(h16_vec_type); - packed_hi = LLVMGetUndef(h16_vec_type); + /* minify = lod >= 0.0 */ + minify = LLVMBuildICmp(builder, LLVMIntSGE, + lod_ipart, int_bld->zero, ""); - lp_build_flow_scope_declare(flow_ctx, &packed_lo); - lp_build_flow_scope_declare(flow_ctx, &packed_hi); - - /* minify = lod > 0.0 */ - minify = LLVMBuildFCmp(builder, LLVMRealUGE, - lod, float_bld->zero, ""); - - lp_build_if(&if_ctx, flow_ctx, builder, minify); + lp_build_if(&if_ctx, builder, minify); { /* Use the minification filter */ - lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + packed_lo, packed_hi); } lp_build_else(&if_ctx); { /* Use the magnification filter */ - lp_build_sample_mipmap(bld, mag_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + mag_filter, PIPE_TEX_MIPFILTER_NONE, + s, t, r, + i32t_zero, NULL, NULL, + packed_lo, packed_hi); } lp_build_endif(&if_ctx); - - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); } - /* combine 'packed_lo', 'packed_hi' into 'packed' */ - { - struct lp_build_context h16, u8n; - - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); - - packed = lp_build_pack2(builder, h16.type, u8n.type, - packed_lo, packed_hi); - } + /* + * combine the values stored in 'packed_lo' and 'packed_hi' variables + * into 'packed' + */ + packed = lp_build_pack2(builder, + h16_bld.type, lp_type_unorm(8), + LLVMBuildLoad(builder, packed_lo, ""), + LLVMBuildLoad(builder, packed_hi, "")); /* * Convert to SoA and swizzle. diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h index e1045bbbc2..5d9ecac4d5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h @@ -50,15 +50,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef data_array, LLVMValueRef texel_out[4]); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 2d80db6dc9..53cc0c5f34 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -47,41 +47,18 @@ #include "lp_bld_arit.h" #include "lp_bld_bitarit.h" #include "lp_bld_logic.h" +#include "lp_bld_printf.h" #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_gather.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" #include "lp_bld_sample_aos.h" +#include "lp_bld_struct.h" #include "lp_bld_quad.h" /** - * Does the given texture wrap mode allow sampling the texture border color? - * XXX maybe move this into gallium util code. - */ -static boolean -wrap_mode_uses_border_color(unsigned mode) -{ - switch (mode) { - case PIPE_TEX_WRAP_REPEAT: - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_REPEAT: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - return FALSE; - case PIPE_TEX_WRAP_CLAMP: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - return TRUE; - default: - assert(0 && "unexpected wrap mode"); - return FALSE; - } -} - - -/** * Generate code to fetch a texel from a texture at int coords (x, y, z). * The computation depends on whether the texture is 1D, 2D or 3D. * The result, texel, will be float vectors: @@ -92,6 +69,7 @@ wrap_mode_uses_border_color(unsigned mode) */ static void lp_build_sample_texel_soa(struct lp_build_sample_context *bld, + unsigned unit, LLVMValueRef width, LLVMValueRef height, LLVMValueRef depth, @@ -103,21 +81,27 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, LLVMValueRef data_ptr, LLVMValueRef texel_out[4]) { - const int dims = texture_dims(bld->static_state->target); + const struct lp_sampler_static_state *static_state = bld->static_state; + const unsigned dims = bld->dims; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef offset; LLVMValueRef i, j; LLVMValueRef use_border = NULL; /* use_border = x < 0 || x >= width || y < 0 || y >= height */ - if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) { + if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s, + static_state->min_img_filter, + static_state->mag_img_filter)) { LLVMValueRef b1, b2; b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); } - if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) { + if (dims >= 2 && + lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t, + static_state->min_img_filter, + static_state->mag_img_filter)) { LLVMValueRef b1, b2; b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); @@ -130,7 +114,10 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } } - if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) { + if (dims == 3 && + lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r, + static_state->min_img_filter, + static_state->mag_img_filter)) { LLVMValueRef b1, b2; b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); @@ -144,7 +131,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } /* convert x,y,z coords to linear offset from start of texture, in bytes */ - lp_build_sample_offset(&bld->uint_coord_bld, + lp_build_sample_offset(&bld->int_coord_bld, bld->format_desc, x, y, z, y_stride, z_stride, &offset, &i, &j); @@ -158,7 +145,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, * coords which are out of bounds to become zero. Zero's guaranteed * to be inside the texture image. */ - offset = lp_build_andnot(&bld->uint_coord_bld, offset, use_border); + offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border); } lp_build_fetch_rgba_soa(bld->builder, @@ -168,8 +155,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, i, j, texel_out); - apply_sampler_swizzle(bld, texel_out); - /* * Note: if we find an app which frequently samples the texture border * we might want to implement a true conditional here to avoid sampling @@ -187,15 +172,22 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, if (use_border) { /* select texel color or border color depending on use_border */ + LLVMValueRef border_color_ptr = + bld->dynamic_state->border_color(bld->dynamic_state, + bld->builder, unit); int chan; for (chan = 0; chan < 4; chan++) { LLVMValueRef border_chan = - lp_build_const_vec(bld->texel_type, - bld->static_state->border_color[chan]); + lp_build_array_get(bld->builder, border_color_ptr, + lp_build_const_int32(chan)); + LLVMValueRef border_chan_vec = + lp_build_broadcast_scalar(&bld->float_vec_bld, border_chan); texel_out[chan] = lp_build_select(&bld->texel_bld, use_border, - border_chan, texel_out[chan]); + border_chan_vec, texel_out[chan]); } } + + apply_sampler_swizzle(bld, texel_out); } @@ -210,11 +202,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld, struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef fract, flr, isOdd; - /* fract = coord - floor(coord) */ - fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord)); - - /* flr = ifloor(coord); */ - flr = lp_build_ifloor(coord_bld, coord); + lp_build_ifloor_fract(coord_bld, coord, &flr, &fract); /* isOdd = flr & 1 */ isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, ""); @@ -242,6 +230,7 @@ static void lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, LLVMValueRef coord, LLVMValueRef length, + LLVMValueRef length_f, boolean is_pot, unsigned wrap_mode, LLVMValueRef *x0_out, @@ -250,10 +239,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, { struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); - LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); - LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); LLVMValueRef coord0, coord1, weight; switch(wrap_mode) { @@ -261,21 +248,25 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* mul by size and subtract 0.5 */ coord = lp_build_mul(coord_bld, coord, length_f); coord = lp_build_sub(coord_bld, coord, half); - /* convert to int */ - coord0 = lp_build_ifloor(coord_bld, coord); - coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one); - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); /* repeat wrap */ if (is_pot) { + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, ""); } else { - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ + /* Add a bias to the texcoord to handle negative coords */ + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); + LLVMValueRef mask; + coord0 = LLVMBuildAdd(bld->builder, coord0, bias, ""); coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); - coord1 = LLVMBuildURem(bld->builder, coord1, length, ""); + mask = lp_build_compare(bld->builder, int_coord_bld->type, + PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); + coord1 = LLVMBuildAnd(bld->builder, + lp_build_add(int_coord_bld, coord0, int_coord_bld->one), + mask, ""); } break; @@ -290,53 +281,47 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - if (bld->static_state->normalized_coords) { - /* clamp to [0,1] */ - coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one); - /* mul by tex size and subtract 0.5 */ - coord = lp_build_mul(coord_bld, coord, length_f); + { + struct lp_build_context abs_coord_bld = bld->coord_bld; + abs_coord_bld.type.sign = FALSE; + + if (bld->static_state->normalized_coords) { + /* mul by tex size */ + coord = lp_build_mul(coord_bld, coord, length_f); + } + /* clamp to length max */ + coord = lp_build_min(coord_bld, coord, length_f); + /* subtract 0.5 */ coord = lp_build_sub(coord_bld, coord, half); + /* clamp to [0, length - 0.5] */ + coord = lp_build_max(coord_bld, coord, coord_bld->zero); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + /* coord1 = min(coord1, length-1) */ + coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); + break; } - else { - LLVMValueRef min, max; - /* clamp to [0.5, length - 0.5] */ - min = half; - max = lp_build_sub(coord_bld, length_f, min); - coord = lp_build_clamp(coord_bld, coord, min, max); - } - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); - /* coord0 = floor(coord); */ - coord0 = lp_build_ifloor(coord_bld, coord); - coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); - /* coord0 = max(coord0, 0) */ - coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); - /* coord1 = min(coord1, length-1) */ - coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); - break; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: { - LLVMValueRef min, max; + LLVMValueRef min; if (bld->static_state->normalized_coords) { /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); } - /* clamp to [-0.5, length + 0.5] */ - min = lp_build_const_vec(coord_bld->type, -0.5F); - max = lp_build_sub(coord_bld, length_f, min); - coord = lp_build_clamp(coord_bld, coord, min, max); + /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */ coord = lp_build_sub(coord_bld, coord, half); - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); - /* convert to int */ - coord0 = lp_build_ifloor(coord_bld, coord); + min = lp_build_const_vec(coord_bld->type, -1.0F); + coord = lp_build_clamp(coord_bld, coord, min, length_f); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); } break; @@ -349,11 +334,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); coord = lp_build_sub(coord_bld, coord, half); - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); - - /* convert to int coords */ - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); /* coord0 = max(coord0, 0) */ @@ -375,15 +357,16 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: { LLVMValueRef min, max; - + struct lp_build_context abs_coord_bld = bld->coord_bld; + abs_coord_bld.type.sign = FALSE; coord = lp_build_abs(coord_bld, coord); if (bld->static_state->normalized_coords) { @@ -398,16 +381,14 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); } break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: { - LLVMValueRef min, max; - coord = lp_build_abs(coord_bld, coord); if (bld->static_state->normalized_coords) { @@ -415,15 +396,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); } - /* clamp to [-0.5, length + 0.5] */ - min = lp_build_negate(coord_bld, half); - max = lp_build_sub(coord_bld, length_f, min); - coord = lp_build_clamp(coord_bld, coord, min, max); - + /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */ + /* skip -0.5 clamp (always positive), do sub first */ coord = lp_build_sub(coord_bld, coord, half); + coord = lp_build_min(coord_bld, coord, length_f); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); } break; @@ -452,14 +431,13 @@ static LLVMValueRef lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, LLVMValueRef coord, LLVMValueRef length, + LLVMValueRef length_f, boolean is_pot, unsigned wrap_mode) { struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); - LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); LLVMValueRef icoord; switch(wrap_mode) { @@ -468,10 +446,12 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, icoord = lp_build_ifloor(coord_bld, coord); if (is_pot) icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, ""); - else - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ + else { + /* Add a bias to the texcoord to handle negative coords */ + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); + icoord = LLVMBuildAdd(bld->builder, icoord, bias, ""); icoord = LLVMBuildURem(bld->builder, icoord, length, ""); + } break; case PIPE_TEX_WRAP_CLAMP: @@ -482,7 +462,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } /* floor */ - icoord = lp_build_ifloor(coord_bld, coord); + /* use itrunc instead since we clamp to 0 anyway */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length - 1]. */ icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, @@ -516,7 +497,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, assert(bld->static_state->normalized_coords); coord = lp_build_mul(coord_bld, coord, length_f); - icoord = lp_build_ifloor(coord_bld, coord); + /* itrunc == ifloor here */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length - 1] */ icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); @@ -531,7 +513,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); } - icoord = lp_build_ifloor(coord_bld, coord); + /* itrunc == ifloor here */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length - 1] */ icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); @@ -545,7 +528,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); } - icoord = lp_build_ifloor(coord_bld, coord); + /* itrunc == ifloor here */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length] */ icoord = lp_build_min(int_coord_bld, icoord, length); @@ -566,9 +550,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_nearest(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + unsigned unit, + LLVMValueRef size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -577,26 +560,47 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef r, LLVMValueRef colors_out[4]) { - const int dims = texture_dims(bld->static_state->target); + const unsigned dims = bld->dims; + LLVMValueRef width_vec; + LLVMValueRef height_vec; + LLVMValueRef depth_vec; + LLVMValueRef flt_size; + LLVMValueRef flt_width_vec; + LLVMValueRef flt_height_vec; + LLVMValueRef flt_depth_vec; LLVMValueRef x, y, z; + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + size, + &width_vec, &height_vec, &depth_vec); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, size); + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &flt_width_vec, &flt_height_vec, &flt_depth_vec); + /* * Compute integer texcoords. */ - x = lp_build_sample_wrap_nearest(bld, s, width_vec, + x = lp_build_sample_wrap_nearest(bld, s, width_vec, flt_width_vec, bld->static_state->pot_width, bld->static_state->wrap_s); lp_build_name(x, "tex.x.wrapped"); if (dims >= 2) { - y = lp_build_sample_wrap_nearest(bld, t, height_vec, + y = lp_build_sample_wrap_nearest(bld, t, height_vec, flt_height_vec, bld->static_state->pot_height, bld->static_state->wrap_t); lp_build_name(y, "tex.y.wrapped"); if (dims == 3) { - z = lp_build_sample_wrap_nearest(bld, r, depth_vec, - bld->static_state->pot_height, + z = lp_build_sample_wrap_nearest(bld, r, depth_vec, flt_depth_vec, + bld->static_state->pot_depth, bld->static_state->wrap_r); lp_build_name(z, "tex.z.wrapped"); } @@ -614,7 +618,8 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, /* * Get texture colors. */ - lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + lp_build_sample_texel_soa(bld, unit, + width_vec, height_vec, depth_vec, x, y, z, row_stride_vec, img_stride_vec, data_ptr, colors_out); @@ -627,9 +632,8 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + unsigned unit, + LLVMValueRef size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -638,16 +642,37 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef r, LLVMValueRef colors_out[4]) { - const int dims = texture_dims(bld->static_state->target); + const unsigned dims = bld->dims; + LLVMValueRef width_vec; + LLVMValueRef height_vec; + LLVMValueRef depth_vec; + LLVMValueRef flt_size; + LLVMValueRef flt_width_vec; + LLVMValueRef flt_height_vec; + LLVMValueRef flt_depth_vec; LLVMValueRef x0, y0, z0, x1, y1, z1; LLVMValueRef s_fpart, t_fpart, r_fpart; LLVMValueRef neighbors[2][2][4]; int chan; + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + size, + &width_vec, &height_vec, &depth_vec); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, size); + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &flt_width_vec, &flt_height_vec, &flt_depth_vec); + /* * Compute integer texcoords. */ - lp_build_sample_wrap_linear(bld, s, width_vec, + lp_build_sample_wrap_linear(bld, s, width_vec, flt_width_vec, bld->static_state->pot_width, bld->static_state->wrap_s, &x0, &x1, &s_fpart); @@ -655,7 +680,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_name(x1, "tex.x1.wrapped"); if (dims >= 2) { - lp_build_sample_wrap_linear(bld, t, height_vec, + lp_build_sample_wrap_linear(bld, t, height_vec, flt_height_vec, bld->static_state->pot_height, bld->static_state->wrap_t, &y0, &y1, &t_fpart); @@ -663,7 +688,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_name(y1, "tex.y1.wrapped"); if (dims == 3) { - lp_build_sample_wrap_linear(bld, r, depth_vec, + lp_build_sample_wrap_linear(bld, r, depth_vec, flt_depth_vec, bld->static_state->pot_depth, bld->static_state->wrap_r, &z0, &z1, &r_fpart); @@ -688,11 +713,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, * Get texture colors. */ /* get x0/x1 texels */ - lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + lp_build_sample_texel_soa(bld, unit, + width_vec, height_vec, depth_vec, x0, y0, z0, row_stride_vec, img_stride_vec, data_ptr, neighbors[0][0]); - lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + lp_build_sample_texel_soa(bld, unit, + width_vec, height_vec, depth_vec, x1, y0, z0, row_stride_vec, img_stride_vec, data_ptr, neighbors[0][1]); @@ -710,11 +737,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef colors0[4]; /* get x0/x1 texels at y1 */ - lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + lp_build_sample_texel_soa(bld, unit, + width_vec, height_vec, depth_vec, x0, y1, z0, row_stride_vec, img_stride_vec, data_ptr, neighbors[1][0]); - lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + lp_build_sample_texel_soa(bld, unit, + width_vec, height_vec, depth_vec, x1, y1, z0, row_stride_vec, img_stride_vec, data_ptr, neighbors[1][1]); @@ -734,19 +763,23 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef colors1[4]; /* get x0/x1/y0/y1 texels at z1 */ - lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + lp_build_sample_texel_soa(bld, unit, + width_vec, height_vec, depth_vec, x0, y0, z1, row_stride_vec, img_stride_vec, data_ptr, neighbors1[0][0]); - lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + lp_build_sample_texel_soa(bld, unit, + width_vec, height_vec, depth_vec, x1, y0, z1, row_stride_vec, img_stride_vec, data_ptr, neighbors1[0][1]); - lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + lp_build_sample_texel_soa(bld, unit, + width_vec, height_vec, depth_vec, x0, y1, z1, row_stride_vec, img_stride_vec, data_ptr, neighbors1[1][0]); - lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + lp_build_sample_texel_soa(bld, unit, + width_vec, height_vec, depth_vec, x1, y1, z1, row_stride_vec, img_stride_vec, data_ptr, neighbors1[1][1]); @@ -786,74 +819,98 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, */ static void lp_build_sample_mipmap(struct lp_build_sample_context *bld, + unsigned unit, unsigned img_filter, unsigned mip_filter, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + LLVMValueRef ilevel0, + LLVMValueRef ilevel1, LLVMValueRef lod_fpart, - LLVMValueRef width0_vec, - LLVMValueRef width1_vec, - LLVMValueRef height0_vec, - LLVMValueRef height1_vec, - LLVMValueRef depth0_vec, - LLVMValueRef depth1_vec, - LLVMValueRef row_stride0_vec, - LLVMValueRef row_stride1_vec, - LLVMValueRef img_stride0_vec, - LLVMValueRef img_stride1_vec, - LLVMValueRef data_ptr0, - LLVMValueRef data_ptr1, LLVMValueRef *colors_out) { + LLVMBuilderRef builder = bld->builder; + LLVMValueRef size0; + LLVMValueRef size1; + LLVMValueRef row_stride0_vec; + LLVMValueRef row_stride1_vec; + LLVMValueRef img_stride0_vec; + LLVMValueRef img_stride1_vec; + LLVMValueRef data_ptr0; + LLVMValueRef data_ptr1; LLVMValueRef colors0[4], colors1[4]; - int chan; + unsigned chan; + /* sample the first mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel0, + &size0, + &row_stride0_vec, &img_stride0_vec); + data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { - /* sample the first mipmap level */ - lp_build_sample_image_nearest(bld, - width0_vec, height0_vec, depth0_vec, + lp_build_sample_image_nearest(bld, unit, + size0, row_stride0_vec, img_stride0_vec, - data_ptr0, s, t, r, colors0); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_nearest(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, colors1); - } + data_ptr0, s, t, r, + colors0); } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); - - /* sample the first mipmap level */ - lp_build_sample_image_linear(bld, - width0_vec, height0_vec, depth0_vec, + lp_build_sample_image_linear(bld, unit, + size0, row_stride0_vec, img_stride0_vec, - data_ptr0, s, t, r, colors0); + data_ptr0, s, t, r, + colors0); + } - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_linear(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, colors1); - } + /* Store the first level's colors in the output variables */ + for (chan = 0; chan < 4; chan++) { + LLVMBuildStore(builder, colors0[chan], colors_out[chan]); } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* interpolate samples from the two mipmap levels */ - for (chan = 0; chan < 4; chan++) { - colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, + struct lp_build_if_state if_ctx; + LLVMValueRef need_lerp; + + /* need_lerp = lod_fpart > 0 */ + need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, + lod_fpart, + bld->float_bld.zero, + "need_lerp"); + + lp_build_if(&if_ctx, builder, need_lerp); + { + /* sample the second mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel1, + &size1, + &row_stride1_vec, &img_stride1_vec); + data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, unit, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + colors1); + } + else { + lp_build_sample_image_linear(bld, unit, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + colors1); + } + + /* interpolate samples from the two mipmap levels */ + + lod_fpart = lp_build_broadcast_scalar(&bld->texel_bld, lod_fpart); + + for (chan = 0; chan < 4; chan++) { + colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, colors0[chan], colors1[chan]); + LLVMBuildStore(builder, colors0[chan], colors_out[chan]); + } } - } - else { - /* use first/only level's colors */ - for (chan = 0; chan < 4; chan++) { - colors_out[chan] = colors0[chan]; - } + lp_build_endif(&if_ctx); } } @@ -874,30 +931,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef data_array, LLVMValueRef *colors_out) { - struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMBuilderRef builder = bld->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef lod = NULL, lod_fpart = NULL; + LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; - LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; - LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; - LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; - LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; - LLVMValueRef data_ptr0, data_ptr1 = NULL; LLVMValueRef face_ddx[4], face_ddy[4]; + LLVMValueRef texels[4]; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0); + unsigned chan; /* printf("%s mip %d min %d mag %d\n", __FUNCTION__, @@ -916,12 +963,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld, r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ - face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); - face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; - face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); - face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; @@ -936,127 +983,109 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, ddx, ddy, - lod_bias, explicit_lod, - width, height, depth); + lp_build_lod_selector(bld, unit, ddx, ddy, + lod_bias, explicit_lod, + mip_filter, + &lod_ipart, &lod_fpart); + } else { + lod_ipart = i32t_zero; } /* - * Compute integer mipmap level(s) to fetch texels from. + * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 */ - if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { + switch (mip_filter) { + default: + assert(0 && "bad mip_filter value in lp_build_sample_soa()"); + /* fall-through */ + case PIPE_TEX_MIPFILTER_NONE: /* always use mip level 0 */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { /* XXX this is a work-around for an apparent bug in LLVM 2.7. * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ - lod = lp_build_const_elem(bld->coord_bld.type, 0.0); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); - } - else { - ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - } - } - else { - assert(lod); - if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { - assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR); - lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, - &lod_fpart); - lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart); + ilevel0 = i32t_zero; } + break; + case PIPE_TEX_MIPFILTER_NEAREST: + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); + break; + case PIPE_TEX_MIPFILTER_LINEAR: + assert(lod_ipart); + assert(lod_fpart); + lp_build_linear_mip_levels(bld, unit, + lod_ipart, &lod_fpart, + &ilevel0, &ilevel1); + break; } - /* compute image size(s) of source mipmap level(s) */ - lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec, - ilevel0, ilevel1, - row_stride_array, img_stride_array, - &width0_vec, &width1_vec, - &height0_vec, &height1_vec, - &depth0_vec, &depth1_vec, - &row_stride0_vec, &row_stride1_vec, - &img_stride0_vec, &img_stride1_vec); - /* - * Get pointer(s) to image data for mipmap level(s). + * Get/interpolate texture colors. */ - data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); + + for (chan = 0; chan < 4; ++chan) { + texels[chan] = lp_build_alloca(builder, bld->texel_bld.vec_type, ""); + lp_build_name(texels[chan], "sampler%u_texel_%c_var", unit, "xyzw"[chan]); } - /* - * Get/interpolate texture colors. - */ if (min_filter == mag_filter) { /* no need to distinquish between minification and magnification */ - lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - colors_out); + lp_build_sample_mipmap(bld, unit, + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + texels); } else { /* Emit conditional to choose min image filter or mag image filter - * depending on the lod being >0 or <= 0, respectively. + * depending on the lod being > 0 or <= 0, respectively. */ - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; LLVMValueRef minify; - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); - - lp_build_flow_scope_declare(flow_ctx, &colors_out[0]); - lp_build_flow_scope_declare(flow_ctx, &colors_out[1]); - lp_build_flow_scope_declare(flow_ctx, &colors_out[2]); - lp_build_flow_scope_declare(flow_ctx, &colors_out[3]); + /* minify = lod >= 0.0 */ + minify = LLVMBuildICmp(builder, LLVMIntSGE, + lod_ipart, int_bld->zero, ""); - /* minify = lod > 0.0 */ - minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE, - lod, float_bld->zero, ""); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, minify); + lp_build_if(&if_ctx, builder, minify); { /* Use the minification filter */ - lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - colors_out); + lp_build_sample_mipmap(bld, unit, + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + texels); } lp_build_else(&if_ctx); { /* Use the magnification filter */ - lp_build_sample_mipmap(bld, mag_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - colors_out); + lp_build_sample_mipmap(bld, unit, + mag_filter, PIPE_TEX_MIPFILTER_NONE, + s, t, r, + i32t_zero, NULL, NULL, + texels); } lp_build_endif(&if_ctx); + } - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + for (chan = 0; chan < 4; ++chan) { + colors_out[chan] = LLVMBuildLoad(builder, texels[chan], ""); + lp_build_name(colors_out[chan], "sampler%u_texel_%c", unit, "xyzw"[chan]); } } +/** + * Do shadow test/comparison. + * \param p the texcoord Z (aka R, aka P) component + * \param texel the texel to compare against (use the X channel) + */ static void lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef p, @@ -1064,30 +1093,30 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, { struct lp_build_context *texel_bld = &bld->texel_bld; LLVMValueRef res; - unsigned chan; + const unsigned chan = 0; - if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) + if (bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) return; - /* TODO: Compare before swizzling, to avoid redundant computations */ - res = NULL; - for(chan = 0; chan < 4; ++chan) { - LLVMValueRef cmp; - cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); - cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); - - if(res) - res = lp_build_add(texel_bld, res, cmp); - else - res = cmp; + /* debug code */ + if (0) { + LLVMValueRef indx = lp_build_const_int32(0); + LLVMValueRef coord = LLVMBuildExtractElement(bld->builder, p, indx, ""); + LLVMValueRef tex = LLVMBuildExtractElement(bld->builder, + texel[chan], indx, ""); + lp_build_printf(bld->builder, "shadow compare coord %f to texture %f\n", + coord, tex); } - assert(res); - res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25)); + /* result = (p FUNC texel) ? 1 : 0 */ + res = lp_build_cmp(texel_bld, bld->static_state->compare_func, + p, texel[chan]); + res = lp_build_select(texel_bld, res, texel_bld->one, texel_bld->zero); /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - for(chan = 0; chan < 3; ++chan) - texel[chan] = res; + texel[0] = + texel[1] = + texel[2] = res; texel[3] = texel_bld->one; } @@ -1131,15 +1160,14 @@ lp_build_sample_soa(LLVMBuilderRef builder, LLVMValueRef explicit_lod, /* optional */ LLVMValueRef texel_out[4]) { + unsigned dims = texture_dims(static_state->target); struct lp_build_sample_context bld; - LLVMValueRef width, width_vec; - LLVMValueRef height, height_vec; - LLVMValueRef depth, depth_vec; - LLVMValueRef row_stride_array, img_stride_array; - LLVMValueRef data_array; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef s; LLVMValueRef t; LLVMValueRef r; + struct lp_type float_vec_type; if (0) { enum pipe_format fmt = static_state->format; @@ -1154,38 +1182,57 @@ lp_build_sample_soa(LLVMBuilderRef builder, bld.static_state = static_state; bld.dynamic_state = dynamic_state; bld.format_desc = util_format_description(static_state->format); + bld.dims = dims; bld.float_type = lp_type_float(32); bld.int_type = lp_type_int(32); bld.coord_type = type; - bld.uint_coord_type = lp_uint_type(type); bld.int_coord_type = lp_int_type(type); + bld.float_size_type = lp_type_float(32); + bld.float_size_type.length = dims > 1 ? 4 : 1; + bld.int_size_type = lp_int_type(bld.float_size_type); bld.texel_type = type; + float_vec_type = lp_type_float_vec(32); + lp_build_context_init(&bld.float_bld, builder, bld.float_type); + lp_build_context_init(&bld.float_vec_bld, builder, float_vec_type); lp_build_context_init(&bld.int_bld, builder, bld.int_type); lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); - lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type); lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); + lp_build_context_init(&bld.int_size_bld, builder, bld.int_size_type); + lp_build_context_init(&bld.float_size_bld, builder, bld.float_size_type); lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); /* Get the dynamic state */ - width = dynamic_state->width(dynamic_state, builder, unit); - height = dynamic_state->height(dynamic_state, builder, unit); - depth = dynamic_state->depth(dynamic_state, builder, unit); - row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit); - img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit); - data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); + bld.width = dynamic_state->width(dynamic_state, builder, unit); + bld.height = dynamic_state->height(dynamic_state, builder, unit); + bld.depth = dynamic_state->depth(dynamic_state, builder, unit); + bld.row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit); + bld.img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit); + bld.data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); /* Note that data_array is an array[level] of pointers to texture images */ s = coords[0]; t = coords[1]; r = coords[2]; - /* width, height, depth as uint vectors */ - width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); - height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); - depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); + /* width, height, depth as single int vector */ + if (dims <= 1) { + bld.int_size = bld.width; + } + else { + bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_bld.undef, + bld.width, LLVMConstInt(i32t, 0, 0), ""); + if (dims >= 2) { + bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, + bld.height, LLVMConstInt(i32t, 1, 0), ""); + if (dims >= 3) { + bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, + bld.depth, LLVMConstInt(i32t, 2, 0), ""); + } + } + } if (0) { /* For debug: no-op texture sampling */ @@ -1195,13 +1242,9 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_is_simple_wrap_mode(static_state->wrap_s) && lp_is_simple_wrap_mode(static_state->wrap_t)) { /* do sampling/filtering with fixed pt arithmetic */ - printf("new sample\n"); lp_build_sample_aos(&bld, unit, s, t, r, ddx, ddy, lod_bias, explicit_lod, - width, height, depth, - width_vec, height_vec, depth_vec, - row_stride_array, img_stride_array, - data_array, texel_out); + texel_out); } else { @@ -1217,13 +1260,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, static_state->wrap_t); } - printf("old sample\n"); lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy, lod_bias, explicit_lod, - width, height, depth, - width_vec, height_vec, depth_vec, - row_stride_array, img_stride_array, - data_array, texel_out); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 2e9e8386de..4685a90e41 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -101,6 +101,83 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, /** + * Combined extract and broadcast (or a mere shuffle when the two types match) + */ +LLVMValueRef +lp_build_extract_broadcast(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef vector, + LLVMValueRef index) +{ + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef res; + + assert(src_type.floating == dst_type.floating); + assert(src_type.width == dst_type.width); + + assert(lp_check_value(src_type, vector)); + assert(LLVMTypeOf(index) == i32t); + + if (src_type.length == 1) { + if (dst_type.length == 1) { + /* + * Trivial scalar -> scalar. + */ + + res = vector; + } + else { + /* + * Broadcast scalar -> vector. + */ + + res = lp_build_broadcast(builder, + lp_build_vec_type(dst_type), + vector); + } + } + else { + if (dst_type.length == src_type.length) { + /* + * Special shuffle of the same size. + */ + + LLVMValueRef shuffle; + shuffle = lp_build_broadcast(builder, + LLVMVectorType(i32t, dst_type.length), + index); + res = LLVMBuildShuffleVector(builder, vector, + LLVMGetUndef(lp_build_vec_type(dst_type)), + shuffle, ""); + } + else { + LLVMValueRef scalar; + scalar = LLVMBuildExtractElement(builder, vector, index, ""); + if (dst_type.length == 1) { + /* + * Trivial extract scalar from vector. + */ + + res = scalar; + } + else { + /* + * General case of different sized vectors. + */ + + res = lp_build_broadcast(builder, + lp_build_vec_type(dst_type), + vector); + } + } + } + + return res; +} + + +/** * Swizzle one channel into all other three channels. */ LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index f9b6a5e725..fdea8442ae 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -55,6 +55,14 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, LLVMValueRef scalar); +LLVMValueRef +lp_build_extract_broadcast(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef vector, + LLVMValueRef index); + + /** * Broadcast one channel of a vector composed of arrays of XYZW structures into * all four channel. diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 97318b3456..a4d3b750c3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -36,6 +36,9 @@ #define LP_BLD_TGSI_H #include "gallivm/lp_bld.h" +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" struct tgsi_token; @@ -55,6 +58,75 @@ enum lp_build_tex_modifier { /** + * Describe a channel of a register. + * + * The value can be a: + * - immediate value (i.e. derived from a IMM register) + * - CONST[n].x/y/z/w + * - IN[n].x/y/z/w + * - undetermined (when .file == TGSI_FILE_NULL) + * + * This is one of the analysis results, and is used to described + * the output color in terms of inputs. + */ +struct lp_tgsi_channel_info +{ + unsigned file:4; /* TGSI_FILE_* */ + unsigned swizzle:3; /* PIPE_SWIZZLE_x */ + union { + uint32_t index; + float value; /* for TGSI_FILE_IMMEDIATE */ + } u; +}; + + +/** + * Describe a texture sampler interpolator. + * + * The interpolation is described in terms of regular inputs. + */ +struct lp_tgsi_texture_info +{ + struct lp_tgsi_channel_info coord[4]; + unsigned target:8; /* TGSI_TEXTURE_* */ + unsigned unit:8; /* Sampler unit */ + unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */ +}; + + +struct lp_tgsi_info +{ + struct tgsi_shader_info base; + + /* + * Whether any of the texture opcodes access a register file other than + * TGSI_FILE_INPUT. + * + * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little + * benefit. + */ + unsigned indirect_textures:1; + + /* + * Texture opcode description. Aimed at detecting and described direct + * texture opcodes. + */ + unsigned num_texs; + struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS]; + + /* + * Output description. Aimed at detecting and describing simple blit + * shaders. + */ + struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4]; + + /* + * Shortcut pointers into the above (for fragment shaders). + */ + const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS]; +}; + +/** * Sampler code generation interface. * * Although texture sampling is a requirement for TGSI translation, it is @@ -97,6 +169,11 @@ struct lp_build_sampler_aos void +lp_build_tgsi_info(const struct tgsi_token *tokens, + struct lp_tgsi_info *info); + + +void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, struct lp_type type, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c new file mode 100644 index 0000000000..ad514463de --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c @@ -0,0 +1,479 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" +#include "lp_bld_debug.h" +#include "lp_bld_tgsi.h" + + +/** + * Analysis context. + * + * This is where we keep store the value of each channel of the IMM/TEMP/OUT + * register values, as we walk the shader. + */ +struct analysis_context +{ + struct lp_tgsi_info *info; + + unsigned num_imms; + float imm[32][4]; + + struct lp_tgsi_channel_info temp[32][4]; +}; + + +/** + * Describe the specified channel of the src register. + */ +static void +analyse_src(struct analysis_context *ctx, + struct lp_tgsi_channel_info *chan_info, + const struct tgsi_src_register *src, + unsigned chan) +{ + chan_info->file = TGSI_FILE_NULL; + if (!src->Indirect && !src->Absolute && !src->Negate) { + unsigned swizzle = tgsi_util_get_src_register_swizzle(src, chan); + if (src->File == TGSI_FILE_TEMPORARY) { + if (src->Index < Elements(ctx->temp)) { + *chan_info = ctx->temp[src->Index][swizzle]; + } + } else { + chan_info->file = src->File; + if (src->File == TGSI_FILE_IMMEDIATE) { + assert(src->Index < Elements(ctx->imm)); + if (src->Index < Elements(ctx->imm)) { + chan_info->u.value = ctx->imm[src->Index][swizzle]; + } + } else { + chan_info->u.index = src->Index; + chan_info->swizzle = swizzle; + } + } + } +} + + +/** + * Whether this register channel refers to a specific immediate value. + */ +static boolean +is_immediate(const struct lp_tgsi_channel_info *chan_info, float value) +{ + return chan_info->file == TGSI_FILE_IMMEDIATE && + chan_info->u.value == value; +} + + +static void +analyse_tex(struct analysis_context *ctx, + const struct tgsi_full_instruction *inst, + enum lp_build_tex_modifier modifier) +{ + struct lp_tgsi_info *info = ctx->info; + unsigned chan; + + if (info->num_texs < Elements(info->tex)) { + struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs]; + bool indirect = FALSE; + unsigned readmask = 0; + + tex_info->target = inst->Texture.Texture; + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + readmask = TGSI_WRITEMASK_X; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + readmask = TGSI_WRITEMASK_XY; + break; + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + readmask = TGSI_WRITEMASK_XYZ; + break; + default: + assert(0); + return; + } + + if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { + /* We don't track explicit derivatives, although we could */ + indirect = TRUE; + tex_info->unit = inst->Src[3].Register.Index; + } else { + if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED || + modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || + modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { + readmask |= TGSI_WRITEMASK_W; + } + tex_info->unit = inst->Src[1].Register.Index; + } + + for (chan = 0; chan < 4; ++chan) { + struct lp_tgsi_channel_info *chan_info = &tex_info->coord[chan]; + if (readmask & (1 << chan)) { + analyse_src(ctx, chan_info, &inst->Src[0].Register, chan); + if (chan_info->file != TGSI_FILE_INPUT) { + indirect = TRUE; + } + } else { + memset(chan_info, 0, sizeof *chan_info); + } + } + + if (indirect) { + info->indirect_textures = TRUE; + } + + ++info->num_texs; + } else { + info->indirect_textures = TRUE; + } +} + + +/** + * Process an instruction, and update the register values accordingly. + */ +static void +analyse_instruction(struct analysis_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct lp_tgsi_info *info = ctx->info; + struct lp_tgsi_channel_info (*regs)[4]; + unsigned max_regs; + unsigned i; + unsigned index; + unsigned chan; + + for (i = 0; i < inst->Instruction.NumDstRegs; ++i) { + const struct tgsi_dst_register *dst = &inst->Dst[i].Register; + + /* + * Get the lp_tgsi_channel_info array corresponding to the destination + * register file. + */ + + if (dst->File == TGSI_FILE_TEMPORARY) { + regs = ctx->temp; + max_regs = Elements(ctx->temp); + } else if (dst->File == TGSI_FILE_OUTPUT) { + regs = info->output; + max_regs = Elements(info->output); + } else if (dst->File == TGSI_FILE_ADDRESS || + dst->File == TGSI_FILE_PREDICATE) { + continue; + } else { + assert(0); + continue; + } + + /* + * Detect direct TEX instructions + */ + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_TEX: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_NONE); + break; + case TGSI_OPCODE_TXD: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); + break; + case TGSI_OPCODE_TXB: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); + break; + case TGSI_OPCODE_TXL: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); + break; + case TGSI_OPCODE_TXP: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED); + break; + default: + break; + } + + /* + * Keep track of assignments and writes + */ + + if (dst->Indirect) { + /* + * It could be any register index so clear all register indices. + */ + + for (chan = 0; chan < 4; ++chan) { + if (dst->WriteMask & (1 << chan)) { + for (index = 0; index < max_regs; ++index) { + regs[index][chan].file = TGSI_FILE_NULL; + } + } + } + } else if (dst->Index < max_regs) { + /* + * Update this destination register value. + */ + + struct lp_tgsi_channel_info res[4]; + + memset(res, 0, sizeof res); + + if (!inst->Instruction.Predicate && + !inst->Instruction.Saturate) { + for (chan = 0; chan < 4; ++chan) { + if (dst->WriteMask & (1 << chan)) { + if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) { + analyse_src(ctx, &res[chan], + &inst->Src[0].Register, chan); + } else if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { + /* + * Propagate values across 1.0 and 0.0 multiplications. + */ + + struct lp_tgsi_channel_info src0; + struct lp_tgsi_channel_info src1; + + analyse_src(ctx, &src0, &inst->Src[0].Register, chan); + analyse_src(ctx, &src1, &inst->Src[1].Register, chan); + + if (is_immediate(&src0, 0.0f)) { + res[chan] = src0; + } else if (is_immediate(&src1, 0.0f)) { + res[chan] = src1; + } else if (is_immediate(&src0, 1.0f)) { + res[chan] = src1; + } else if (is_immediate(&src1, 1.0f)) { + res[chan] = src0; + } + } + } + } + } + + for (chan = 0; chan < 4; ++chan) { + if (dst->WriteMask & (1 << chan)) { + regs[dst->Index][chan] = res[chan]; + } + } + } + } + + /* + * Clear all temporaries information in presence of a control flow opcode. + */ + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_IFC: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_BRK: + case TGSI_OPCODE_BREAKC: + case TGSI_OPCODE_CONT: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_CALLNZ: + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + case TGSI_OPCODE_SWITCH: + case TGSI_OPCODE_CASE: + case TGSI_OPCODE_DEFAULT: + case TGSI_OPCODE_ENDSWITCH: + case TGSI_OPCODE_RET: + case TGSI_OPCODE_END: + /* XXX: Are there more cases? */ + memset(&ctx->temp, 0, sizeof ctx->temp); + memset(&info->output, 0, sizeof info->output); + default: + break; + } +} + + +static INLINE void +dump_info(const struct tgsi_token *tokens, + struct lp_tgsi_info *info) +{ + unsigned index; + unsigned chan; + + tgsi_dump(tokens, 0); + + for (index = 0; index < info->num_texs; ++index) { + const struct lp_tgsi_texture_info *tex_info = &info->tex[index]; + debug_printf("TEX[%u] =", index); + for (chan = 0; chan < 4; ++chan) { + const struct lp_tgsi_channel_info *chan_info = + &tex_info->coord[chan]; + if (chan_info->file != TGSI_FILE_NULL) { + debug_printf(" %s[%u].%c", + tgsi_file_names[chan_info->file], + chan_info->u.index, + "xyzw01"[chan_info->swizzle]); + } else { + debug_printf(" _"); + } + } + debug_printf(", SAMP[%u], %s\n", + tex_info->unit, + tgsi_texture_names[tex_info->target]); + } + + for (index = 0; index < PIPE_MAX_SHADER_OUTPUTS; ++index) { + for (chan = 0; chan < 4; ++chan) { + const struct lp_tgsi_channel_info *chan_info = + &info->output[index][chan]; + if (chan_info->file != TGSI_FILE_NULL) { + debug_printf("OUT[%u].%c = ", index, "xyzw"[chan]); + if (chan_info->file == TGSI_FILE_IMMEDIATE) { + debug_printf("%f", chan_info->u.value); + } else { + const char *file_name; + switch (chan_info->file) { + case TGSI_FILE_CONSTANT: + file_name = "CONST"; + break; + case TGSI_FILE_INPUT: + file_name = "IN"; + break; + default: + file_name = "???"; + break; + } + debug_printf("%s[%u].%c", + file_name, + chan_info->u.index, + "xyzw01"[chan_info->swizzle]); + } + debug_printf("\n"); + } + } + } +} + + +/** + * Detect any direct relationship between the output color + */ +void +lp_build_tgsi_info(const struct tgsi_token *tokens, + struct lp_tgsi_info *info) +{ + struct tgsi_parse_context parse; + struct analysis_context ctx; + unsigned index; + unsigned chan; + + memset(info, 0, sizeof *info); + + tgsi_scan_shader(tokens, &info->base); + + memset(&ctx, 0, sizeof ctx); + ctx.info = info; + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *inst = + &parse.FullToken.FullInstruction; + + if (inst->Instruction.Opcode == TGSI_OPCODE_END || + inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { + /* We reached the end of main function body. */ + goto finished; + } + + analyse_instruction(&ctx, inst); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const unsigned size = + parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + assert(size <= 4); + if (ctx.num_imms < Elements(ctx.imm)) { + for (chan = 0; chan < size; ++chan) { + ctx.imm[ctx.num_imms][chan] = + parse.FullToken.FullImmediate.u[chan].Float; + } + ++ctx.num_imms; + } + } + break; + + case TGSI_TOKEN_TYPE_PROPERTY: + break; + + default: + assert(0); + } + } +finished: + + tgsi_parse_free(&parse); + + + /* + * Link the output color values. + */ + + for (index = 0; index < PIPE_MAX_COLOR_BUFS; ++index) { + const struct lp_tgsi_channel_info null_output[4]; + info->cbuf[index] = null_output; + } + + for (index = 0; index < info->base.num_outputs; ++index) { + unsigned semantic_name = info->base.output_semantic_name[index]; + unsigned semantic_index = info->base.output_semantic_index[index]; + if (semantic_name == TGSI_SEMANTIC_COLOR && + semantic_index < PIPE_MAX_COLOR_BUFS) { + info->cbuf[semantic_index] = info->output[index]; + } + } + + if (gallivm_debug & GALLIVM_DEBUG_TGSI) { + dump_info(tokens, info); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 441aebae29..2bc90579a2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -887,21 +887,25 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, } if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); for (i = 0; i < num_coords; i++) { - ddx[i] = emit_fetch( bld, inst, 1, i ); - ddy[i] = emit_fetch( bld, inst, 2, i ); + LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); + LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); + ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, ""); + ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, ""); } unit = inst->Src[3].Register.Index; } else { for (i = 0; i < num_coords; i++) { - ddx[i] = lp_build_ddx( &bld->base, coords[i] ); - ddy[i] = lp_build_ddy( &bld->base, coords[i] ); + ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); + ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); } unit = inst->Src[1].Register.Index; } for (i = num_coords; i < 3; i++) { - ddx[i] = bld->base.undef; - ddy[i] = bld->base.undef; + ddx[i] = LLVMGetUndef(bld->base.elem_type); + ddy[i] = LLVMGetUndef(bld->base.elem_type); } bld->sampler->emit_fetch_texel(bld->sampler, @@ -913,6 +917,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, texel); } +static boolean +near_end_of_shader(struct lp_build_tgsi_soa_context *bld, + int pc) +{ + int i; + + for (i = 0; i < 5; i++) { + unsigned opcode; + + if (pc + i >= bld->info->num_instructions) + return TRUE; + + opcode = bld->instructions[pc + i].Instruction.Opcode; + + if (opcode == TGSI_OPCODE_END) + return TRUE; + + if (opcode == TGSI_OPCODE_TEX || + opcode == TGSI_OPCODE_TXP || + opcode == TGSI_OPCODE_TXD || + opcode == TGSI_OPCODE_TXB || + opcode == TGSI_OPCODE_TXL || + opcode == TGSI_OPCODE_TXF || + opcode == TGSI_OPCODE_TXQ || + opcode == TGSI_OPCODE_CAL || + opcode == TGSI_OPCODE_CALLNZ || + opcode == TGSI_OPCODE_IF || + opcode == TGSI_OPCODE_IFC || + opcode == TGSI_OPCODE_BGNLOOP || + opcode == TGSI_OPCODE_SWITCH) + return FALSE; + } + + return TRUE; +} + + /** * Kill fragment if any of the src register values are negative. @@ -920,7 +961,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, static void emit_kil( struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst ) + const struct tgsi_full_instruction *inst, + int pc) { const struct tgsi_full_src_register *reg = &inst->Src[0]; LLVMValueRef terms[NUM_CHANNELS]; @@ -959,8 +1001,12 @@ emit_kil( } } - if(mask) + if(mask) { lp_build_mask_update(bld->mask, mask); + + if (!near_end_of_shader(bld, pc)) + lp_build_mask_check(bld->mask); + } } @@ -972,7 +1018,8 @@ emit_kil( */ static void emit_kilp(struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst) + const struct tgsi_full_instruction *inst, + int pc) { LLVMValueRef mask; @@ -987,6 +1034,9 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, } lp_build_mask_update(bld->mask, mask); + + if (!near_end_of_shader(bld, pc)) + lp_build_mask_check(bld->mask); } static void @@ -1535,12 +1585,12 @@ emit_instruction( case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( bld, inst ); + emit_kilp( bld, inst, (*pc)-1 ); break; case TGSI_OPCODE_KIL: /* conditional kill */ - emit_kil( bld, inst ); + emit_kil( bld, inst, (*pc)-1 ); break; case TGSI_OPCODE_PK2H: diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index b4d8107372..a6eb403962 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -222,7 +222,7 @@ pb_cache_buffer_vtbl = { }; -static INLINE boolean +static INLINE int pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, pb_size size, const struct pb_desc *desc) @@ -230,26 +230,26 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, void *map; if(buf->base.base.size < size) - return FALSE; + return 0; /* be lenient with size */ if(buf->base.base.size >= 2*size) - return FALSE; + return 0; if(!pb_check_alignment(desc->alignment, buf->base.base.alignment)) - return FALSE; + return 0; if(!pb_check_usage(desc->usage, buf->base.base.usage)) - return FALSE; + return 0; map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL); if (!map) { - return FALSE; + return -1; } pb_unmap(buf->buffer); - return TRUE; + return 1; } @@ -263,7 +263,8 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, struct pb_cache_buffer *curr_buf; struct list_head *curr, *next; int64_t now; - + int ret = 0; + pipe_mutex_lock(mgr->mutex); buf = NULL; @@ -274,25 +275,30 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, now = os_time_get(); while(curr != &mgr->delayed) { curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); - if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc)) - buf = curr_buf; + if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0)) + buf = curr_buf; else if(os_time_timeout(curr_buf->start, curr_buf->end, now)) - _pb_cache_buffer_destroy(curr_buf); + _pb_cache_buffer_destroy(curr_buf); else /* This buffer (and all hereafter) are still hot in cache */ break; + if (ret == -1) + break; curr = next; next = curr->next; } /* keep searching in the hot buffers */ - if(!buf) { + if(!buf && ret != -1) { while(curr != &mgr->delayed) { curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); - if(pb_cache_is_buffer_compat(curr_buf, size, desc)) { + ret = pb_cache_is_buffer_compat(curr_buf, size, desc); + if (ret > 0) { buf = curr_buf; break; } + if (ret == -1) + break; /* no need to check the timeout here */ curr = next; next = curr->next; @@ -301,6 +307,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, if(buf) { LIST_DEL(&buf->head); + --mgr->numDelayed; pipe_mutex_unlock(mgr->mutex); /* Increase refcount */ pipe_reference_init(&buf->base.base.reference, 1); diff --git a/src/gallium/auxiliary/rbug/rbug_context.c b/src/gallium/auxiliary/rbug/rbug_context.c index 1832425658..a3fd7e8430 100644 --- a/src/gallium/auxiliary/rbug/rbug_context.c +++ b/src/gallium/auxiliary/rbug/rbug_context.c @@ -480,7 +480,7 @@ struct rbug_proto_context_list * rbug_demarshal_context_list(struct rbug_proto_h if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST) + if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST) return NULL; pos = 0; @@ -506,7 +506,7 @@ struct rbug_proto_context_info * rbug_demarshal_context_info(struct rbug_proto_h if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO) + if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO) return NULL; pos = 0; @@ -533,7 +533,7 @@ struct rbug_proto_context_draw_block * rbug_demarshal_context_draw_block(struct if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCK) + if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCK) return NULL; pos = 0; @@ -561,7 +561,7 @@ struct rbug_proto_context_draw_step * rbug_demarshal_context_draw_step(struct rb if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_STEP) + if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_STEP) return NULL; pos = 0; @@ -589,7 +589,7 @@ struct rbug_proto_context_draw_unblock * rbug_demarshal_context_draw_unblock(str if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK) + if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK) return NULL; pos = 0; @@ -617,7 +617,7 @@ struct rbug_proto_context_draw_rule * rbug_demarshal_context_draw_rule(struct rb if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_RULE) + if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_RULE) return NULL; pos = 0; @@ -649,7 +649,7 @@ struct rbug_proto_context_flush * rbug_demarshal_context_flush(struct rbug_proto if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_CONTEXT_FLUSH) + if (header->opcode != (int32_t)RBUG_OP_CONTEXT_FLUSH) return NULL; pos = 0; @@ -677,7 +677,7 @@ struct rbug_proto_context_list_reply * rbug_demarshal_context_list_reply(struct if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST_REPLY) + if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST_REPLY) return NULL; pos = 0; @@ -705,7 +705,7 @@ struct rbug_proto_context_info_reply * rbug_demarshal_context_info_reply(struct if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO_REPLY) + if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO_REPLY) return NULL; pos = 0; @@ -739,7 +739,7 @@ struct rbug_proto_context_draw_blocked * rbug_demarshal_context_draw_blocked(str if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCKED) + if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCKED) return NULL; pos = 0; diff --git a/src/gallium/auxiliary/rbug/rbug_core.c b/src/gallium/auxiliary/rbug/rbug_core.c index 876ae5a0ce..1d47d13c9f 100644 --- a/src/gallium/auxiliary/rbug/rbug_core.c +++ b/src/gallium/auxiliary/rbug/rbug_core.c @@ -233,7 +233,7 @@ struct rbug_proto_noop * rbug_demarshal_noop(struct rbug_proto_header *header) if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_NOOP) + if (header->opcode != (int32_t)RBUG_OP_NOOP) return NULL; pos = 0; @@ -259,7 +259,7 @@ struct rbug_proto_ping * rbug_demarshal_ping(struct rbug_proto_header *header) if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_PING) + if (header->opcode != (int32_t)RBUG_OP_PING) return NULL; pos = 0; @@ -285,7 +285,7 @@ struct rbug_proto_error * rbug_demarshal_error(struct rbug_proto_header *header) if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_ERROR) + if (header->opcode != (int32_t)RBUG_OP_ERROR) return NULL; pos = 0; @@ -312,7 +312,7 @@ struct rbug_proto_ping_reply * rbug_demarshal_ping_reply(struct rbug_proto_heade if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_PING_REPLY) + if (header->opcode != (int32_t)RBUG_OP_PING_REPLY) return NULL; pos = 0; @@ -339,7 +339,7 @@ struct rbug_proto_error_reply * rbug_demarshal_error_reply(struct rbug_proto_hea if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_ERROR_REPLY) + if (header->opcode != (int32_t)RBUG_OP_ERROR_REPLY) return NULL; pos = 0; diff --git a/src/gallium/auxiliary/rbug/rbug_demarshal.c b/src/gallium/auxiliary/rbug/rbug_demarshal.c index 47390fbcee..06caa45469 100644 --- a/src/gallium/auxiliary/rbug/rbug_demarshal.c +++ b/src/gallium/auxiliary/rbug/rbug_demarshal.c @@ -91,3 +91,67 @@ struct rbug_header * rbug_demarshal(struct rbug_proto_header *header) return NULL; } } + +const char* rbug_proto_get_name(enum rbug_opcode opcode) +{ + switch(opcode) { + case RBUG_OP_NOOP: + return "RBUG_OP_NOOP"; + case RBUG_OP_PING: + return "RBUG_OP_PING"; + case RBUG_OP_ERROR: + return "RBUG_OP_ERROR"; + case RBUG_OP_PING_REPLY: + return "RBUG_OP_PING_REPLY"; + case RBUG_OP_ERROR_REPLY: + return "RBUG_OP_ERROR_REPLY"; + case RBUG_OP_TEXTURE_LIST: + return "RBUG_OP_TEXTURE_LIST"; + case RBUG_OP_TEXTURE_INFO: + return "RBUG_OP_TEXTURE_INFO"; + case RBUG_OP_TEXTURE_WRITE: + return "RBUG_OP_TEXTURE_WRITE"; + case RBUG_OP_TEXTURE_READ: + return "RBUG_OP_TEXTURE_READ"; + case RBUG_OP_TEXTURE_LIST_REPLY: + return "RBUG_OP_TEXTURE_LIST_REPLY"; + case RBUG_OP_TEXTURE_INFO_REPLY: + return "RBUG_OP_TEXTURE_INFO_REPLY"; + case RBUG_OP_TEXTURE_READ_REPLY: + return "RBUG_OP_TEXTURE_READ_REPLY"; + case RBUG_OP_CONTEXT_LIST: + return "RBUG_OP_CONTEXT_LIST"; + case RBUG_OP_CONTEXT_INFO: + return "RBUG_OP_CONTEXT_INFO"; + case RBUG_OP_CONTEXT_DRAW_BLOCK: + return "RBUG_OP_CONTEXT_DRAW_BLOCK"; + case RBUG_OP_CONTEXT_DRAW_STEP: + return "RBUG_OP_CONTEXT_DRAW_STEP"; + case RBUG_OP_CONTEXT_DRAW_UNBLOCK: + return "RBUG_OP_CONTEXT_DRAW_UNBLOCK"; + case RBUG_OP_CONTEXT_DRAW_RULE: + return "RBUG_OP_CONTEXT_DRAW_RULE"; + case RBUG_OP_CONTEXT_FLUSH: + return "RBUG_OP_CONTEXT_FLUSH"; + case RBUG_OP_CONTEXT_LIST_REPLY: + return "RBUG_OP_CONTEXT_LIST_REPLY"; + case RBUG_OP_CONTEXT_INFO_REPLY: + return "RBUG_OP_CONTEXT_INFO_REPLY"; + case RBUG_OP_CONTEXT_DRAW_BLOCKED: + return "RBUG_OP_CONTEXT_DRAW_BLOCKED"; + case RBUG_OP_SHADER_LIST: + return "RBUG_OP_SHADER_LIST"; + case RBUG_OP_SHADER_INFO: + return "RBUG_OP_SHADER_INFO"; + case RBUG_OP_SHADER_DISABLE: + return "RBUG_OP_SHADER_DISABLE"; + case RBUG_OP_SHADER_REPLACE: + return "RBUG_OP_SHADER_REPLACE"; + case RBUG_OP_SHADER_LIST_REPLY: + return "RBUG_OP_SHADER_LIST_REPLY"; + case RBUG_OP_SHADER_INFO_REPLY: + return "RBUG_OP_SHADER_INFO_REPLY"; + default: + return NULL; + } +} diff --git a/src/gallium/auxiliary/rbug/rbug_proto.h b/src/gallium/auxiliary/rbug/rbug_proto.h index 4f3eb75dc4..2fce725bc9 100644 --- a/src/gallium/auxiliary/rbug/rbug_proto.h +++ b/src/gallium/auxiliary/rbug/rbug_proto.h @@ -91,4 +91,9 @@ struct rbug_proto_header */ struct rbug_connection; +/** + * Get printable string for opcode. + */ +const char* rbug_proto_get_name(enum rbug_opcode opcode); + #endif diff --git a/src/gallium/auxiliary/rbug/rbug_shader.c b/src/gallium/auxiliary/rbug/rbug_shader.c index fccd2f55ef..1742941cc1 100644 --- a/src/gallium/auxiliary/rbug/rbug_shader.c +++ b/src/gallium/auxiliary/rbug/rbug_shader.c @@ -305,7 +305,7 @@ struct rbug_proto_shader_list * rbug_demarshal_shader_list(struct rbug_proto_hea if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST) + if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST) return NULL; pos = 0; @@ -332,7 +332,7 @@ struct rbug_proto_shader_info * rbug_demarshal_shader_info(struct rbug_proto_hea if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO) + if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO) return NULL; pos = 0; @@ -360,7 +360,7 @@ struct rbug_proto_shader_disable * rbug_demarshal_shader_disable(struct rbug_pro if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_SHADER_DISABLE) + if (header->opcode != (int32_t)RBUG_OP_SHADER_DISABLE) return NULL; pos = 0; @@ -389,7 +389,7 @@ struct rbug_proto_shader_replace * rbug_demarshal_shader_replace(struct rbug_pro if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_SHADER_REPLACE) + if (header->opcode != (int32_t)RBUG_OP_SHADER_REPLACE) return NULL; pos = 0; @@ -418,7 +418,7 @@ struct rbug_proto_shader_list_reply * rbug_demarshal_shader_list_reply(struct rb if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST_REPLY) + if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST_REPLY) return NULL; pos = 0; @@ -446,7 +446,7 @@ struct rbug_proto_shader_info_reply * rbug_demarshal_shader_info_reply(struct rb if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO_REPLY) + if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO_REPLY) return NULL; pos = 0; diff --git a/src/gallium/auxiliary/rbug/rbug_texture.c b/src/gallium/auxiliary/rbug/rbug_texture.c index 5a918fe6bc..2ad577915e 100644 --- a/src/gallium/auxiliary/rbug/rbug_texture.c +++ b/src/gallium/auxiliary/rbug/rbug_texture.c @@ -417,7 +417,7 @@ struct rbug_proto_texture_list * rbug_demarshal_texture_list(struct rbug_proto_h if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST) + if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST) return NULL; pos = 0; @@ -443,7 +443,7 @@ struct rbug_proto_texture_info * rbug_demarshal_texture_info(struct rbug_proto_h if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO) + if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO) return NULL; pos = 0; @@ -470,7 +470,7 @@ struct rbug_proto_texture_write * rbug_demarshal_texture_write(struct rbug_proto if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_TEXTURE_WRITE) + if (header->opcode != (int32_t)RBUG_OP_TEXTURE_WRITE) return NULL; pos = 0; @@ -506,7 +506,7 @@ struct rbug_proto_texture_read * rbug_demarshal_texture_read(struct rbug_proto_h if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ) + if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ) return NULL; pos = 0; @@ -540,7 +540,7 @@ struct rbug_proto_texture_list_reply * rbug_demarshal_texture_list_reply(struct if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST_REPLY) + if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST_REPLY) return NULL; pos = 0; @@ -568,7 +568,7 @@ struct rbug_proto_texture_info_reply * rbug_demarshal_texture_info_reply(struct if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO_REPLY) + if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO_REPLY) return NULL; pos = 0; @@ -606,7 +606,7 @@ struct rbug_proto_texture_read_reply * rbug_demarshal_texture_read_reply(struct if (!header) return NULL; - if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ_REPLY) + if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ_REPLY) return NULL; pos = 0; diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 65d5ce795b..fbde1d191a 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -58,7 +58,6 @@ #include <unistd.h> #include <sys/mman.h> -#include "os/os_thread.h" #include "util/u_mm.h" #define EXEC_HEAP_SIZE (10*1024*1024) diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index 036c1ee48a..34bfa527db 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -23,26 +23,13 @@ #include "cell/ppu/cell_public.h" #endif + static INLINE struct pipe_screen * -sw_screen_create(struct sw_winsys *winsys) +sw_screen_create_named(struct sw_winsys *winsys, const char *driver) { - const char *default_driver; - const char *driver; struct pipe_screen *screen = NULL; #if defined(GALLIUM_CELL) - default_driver = "cell"; -#elif defined(GALLIUM_LLVMPIPE) - default_driver = "llvmpipe"; -#elif defined(GALLIUM_SOFTPIPE) - default_driver = "softpipe"; -#else - default_driver = ""; -#endif - - driver = debug_get_option("GALLIUM_DRIVER", default_driver); - -#if defined(GALLIUM_CELL) if (screen == NULL && strcmp(driver, "cell") == 0) screen = cell_create_screen(winsys); #endif @@ -60,4 +47,26 @@ sw_screen_create(struct sw_winsys *winsys) return screen; } + +static INLINE struct pipe_screen * +sw_screen_create(struct sw_winsys *winsys) +{ + const char *default_driver; + const char *driver; + +#if defined(GALLIUM_CELL) + default_driver = "cell"; +#elif defined(GALLIUM_LLVMPIPE) + default_driver = "llvmpipe"; +#elif defined(GALLIUM_SOFTPIPE) + default_driver = "softpipe"; +#else + default_driver = ""; +#endif + + driver = debug_get_option("GALLIUM_DRIVER", default_driver); + return sw_screen_create_named(winsys, driver); +} + + #endif diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h index 0b4e740403..e4effa713e 100644 --- a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h @@ -13,22 +13,28 @@ static INLINE struct pipe_screen * sw_screen_wrap(struct pipe_screen *screen) { struct sw_winsys *sws; - struct pipe_screen *sw_screen; + struct pipe_screen *sw_screen = NULL; + const char *driver; - sws = wrapper_sw_winsys_warp_pipe_screen(screen); + driver = debug_get_option("GALLIUM_DRIVER", "native"); + if (strcmp(driver, "native") == 0) + return screen; + + sws = wrapper_sw_winsys_wrap_pipe_screen(screen); if (!sws) goto err; - sw_screen = sw_screen_create(sws); - if (sw_screen == screen) + sw_screen = sw_screen_create_named(sws, driver); + + if (!sw_screen) goto err_winsys; return sw_screen; err_winsys: - sws->destroy(sws); + return wrapper_sw_winsys_dewrap_pipe_screen(sws); err: - return screen; + return screen; } #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index f71ffb7030..77bde86684 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -90,7 +90,8 @@ static const char *processor_type_names[] = "GEOM" }; -static const char *file_names[TGSI_FILE_COUNT] = +const char * +tgsi_file_names[TGSI_FILE_COUNT] = { "NULL", "CONST", @@ -125,7 +126,8 @@ static const char *semantic_names[] = "FACE", "EDGEFLAG", "PRIM_ID", - "INSTANCEID" + "INSTANCEID", + "STENCIL" }; static const char *immediate_type_names[] = @@ -135,7 +137,8 @@ static const char *immediate_type_names[] = "INT32" }; -static const char *swizzle_names[] = +const char * +tgsi_swizzle_names[] = { "x", "y", @@ -143,7 +146,8 @@ static const char *swizzle_names[] = "w" }; -static const char *texture_names[] = +const char * +tgsi_texture_names[] = { "UNKNOWN", "1D", @@ -201,15 +205,15 @@ _dump_register_src( struct dump_ctx *ctx, const struct tgsi_full_src_register *src ) { - ENM(src->Register.File, file_names); + ENM(src->Register.File, tgsi_file_names); if (src->Register.Dimension) { if (src->Dimension.Indirect) { CHR( '[' ); - ENM( src->DimIndirect.File, file_names ); + ENM( src->DimIndirect.File, tgsi_file_names ); CHR( '[' ); SID( src->DimIndirect.Index ); TXT( "]." ); - ENM( src->DimIndirect.SwizzleX, swizzle_names ); + ENM( src->DimIndirect.SwizzleX, tgsi_swizzle_names ); if (src->Dimension.Index != 0) { if (src->Dimension.Index > 0) CHR( '+' ); @@ -224,11 +228,11 @@ _dump_register_src( } if (src->Register.Indirect) { CHR( '[' ); - ENM( src->Indirect.File, file_names ); + ENM( src->Indirect.File, tgsi_file_names ); CHR( '[' ); SID( src->Indirect.Index ); TXT( "]." ); - ENM( src->Indirect.SwizzleX, swizzle_names ); + ENM( src->Indirect.SwizzleX, tgsi_swizzle_names ); if (src->Register.Index != 0) { if (src->Register.Index > 0) CHR( '+' ); @@ -248,15 +252,15 @@ _dump_register_dst( struct dump_ctx *ctx, const struct tgsi_full_dst_register *dst ) { - ENM(dst->Register.File, file_names); + ENM(dst->Register.File, tgsi_file_names); if (dst->Register.Dimension) { if (dst->Dimension.Indirect) { CHR( '[' ); - ENM( dst->DimIndirect.File, file_names ); + ENM( dst->DimIndirect.File, tgsi_file_names ); CHR( '[' ); SID( dst->DimIndirect.Index ); TXT( "]." ); - ENM( dst->DimIndirect.SwizzleX, swizzle_names ); + ENM( dst->DimIndirect.SwizzleX, tgsi_swizzle_names ); if (dst->Dimension.Index != 0) { if (dst->Dimension.Index > 0) CHR( '+' ); @@ -271,11 +275,11 @@ _dump_register_dst( } if (dst->Register.Indirect) { CHR( '[' ); - ENM( dst->Indirect.File, file_names ); + ENM( dst->Indirect.File, tgsi_file_names ); CHR( '[' ); SID( dst->Indirect.Index ); TXT( "]." ); - ENM( dst->Indirect.SwizzleX, swizzle_names ); + ENM( dst->Indirect.SwizzleX, tgsi_swizzle_names ); if (dst->Register.Index != 0) { if (dst->Register.Index > 0) CHR( '+' ); @@ -351,7 +355,7 @@ iter_declaration( TXT( "DCL " ); - ENM(decl->Declaration.File, file_names); + ENM(decl->Declaration.File, tgsi_file_names); /* all geometry shader inputs are two dimensional */ if (decl->Declaration.File == TGSI_FILE_INPUT && @@ -585,10 +589,10 @@ iter_instruction( inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z || inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); - ENM( inst->Predicate.SwizzleX, swizzle_names ); - ENM( inst->Predicate.SwizzleY, swizzle_names ); - ENM( inst->Predicate.SwizzleZ, swizzle_names ); - ENM( inst->Predicate.SwizzleW, swizzle_names ); + ENM( inst->Predicate.SwizzleX, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleY, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleZ, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleW, tgsi_swizzle_names ); } TXT( ") " ); @@ -641,10 +645,10 @@ iter_instruction( src->Register.SwizzleZ != TGSI_SWIZZLE_Z || src->Register.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); - ENM( src->Register.SwizzleX, swizzle_names ); - ENM( src->Register.SwizzleY, swizzle_names ); - ENM( src->Register.SwizzleZ, swizzle_names ); - ENM( src->Register.SwizzleW, swizzle_names ); + ENM( src->Register.SwizzleX, tgsi_swizzle_names ); + ENM( src->Register.SwizzleY, tgsi_swizzle_names ); + ENM( src->Register.SwizzleZ, tgsi_swizzle_names ); + ENM( src->Register.SwizzleW, tgsi_swizzle_names ); } if (src->Register.Absolute) @@ -655,7 +659,7 @@ iter_instruction( if (inst->Instruction.Texture) { TXT( ", " ); - ENM( inst->Texture.Texture, texture_names ); + ENM( inst->Texture.Texture, tgsi_texture_names ); } switch (inst->Instruction.Opcode) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h index dd78b36100..fc0429ad8d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -35,6 +35,15 @@ extern "C" { #endif +extern const char * +tgsi_file_names[TGSI_FILE_COUNT]; + +extern const char * +tgsi_swizzle_names[]; + +extern const char * +tgsi_texture_names[]; + void tgsi_dump_str( const struct tgsi_token *tokens, diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 0757f05dfa..3a71540506 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -605,8 +605,10 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { if ((inst->Src[i].Register.File == inst->Dst[0].Register.File) && - (inst->Src[i].Register.Index == - inst->Dst[0].Register.Index)) { + ((inst->Src[i].Register.Index == + inst->Dst[0].Register.Index) || + inst->Src[i].Register.Indirect || + inst->Dst[0].Register.Indirect)) { /* loop over dest channels */ uint channelsWritten = 0x0; FOR_EACH_ENABLED_CHANNEL(*inst, chan) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index e472947507..b3123ed016 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -163,6 +163,10 @@ OP12(USGE) OP12(USHR) OP12(USLT) OP12(USNE) +OP01(SWITCH) +OP01(CASE) +OP00(DEFAULT) +OP00(ENDSWITCH) #undef OP00 diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 90198a4f60..6585da3e83 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -147,6 +147,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; + info->input_centroid[reg] = (ubyte)fulldecl->Declaration.Centroid; info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap; info->num_inputs++; } @@ -157,9 +158,11 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* extra info for special outputs */ if (procType == TGSI_PROCESSOR_FRAGMENT && - fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) { - info->writes_z = TRUE; - } + fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) + info->writes_z = TRUE; + if (procType == TGSI_PROCESSOR_FRAGMENT && + fulldecl->Semantic.Name == TGSI_SEMANTIC_STENCIL) + info->writes_stencil = TRUE; if (procType == TGSI_PROCESSOR_VERTEX && fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) { info->writes_edgeflag = TRUE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index f8aa90cf06..104097fbc0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -45,6 +45,7 @@ struct tgsi_shader_info ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; + ubyte input_centroid[PIPE_MAX_SHADER_INPUTS]; ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS]; ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ @@ -60,6 +61,7 @@ struct tgsi_shader_info uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ boolean writes_z; /**< does fragment shader write Z value? */ + boolean writes_stencil; /**< does fragment shader write stencil value? */ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 13e2e8eb99..086d983a73 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2830,31 +2830,52 @@ static void soa_to_aos( struct x86_function *func, * Check if the instructions dst register is the same as any src * register and warn if there's a posible SOA dependency. */ -static void +static boolean check_soa_dependencies(const struct tgsi_full_instruction *inst) { - switch (inst->Instruction.Opcode) { + uint opcode = inst->Instruction.Opcode; + + /* XXX: we only handle src/dst aliasing in a few opcodes currently. + * Need to use an additional temporay to hold the result in the + * cases where the code is too opaque to fix. + */ + + switch (opcode) { case TGSI_OPCODE_ADD: case TGSI_OPCODE_MOV: case TGSI_OPCODE_MUL: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LOG: + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DP2A: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_POW: case TGSI_OPCODE_XPD: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_NRM: + case TGSI_OPCODE_NRM4: + case TGSI_OPCODE_DP2: /* OK - these opcodes correctly handle SOA dependencies */ - break; + return TRUE; default: - if (tgsi_check_soa_dependencies(inst)) { - uint opcode = inst->Instruction.Opcode; + if (!tgsi_check_soa_dependencies(inst)) + return TRUE; - /* XXX: we only handle src/dst aliasing in a few opcodes - * currently. Need to use an additional temporay to hold - * the result in the cases where the code is too opaque to - * fix. - */ - if (opcode != TGSI_OPCODE_MOV) { - debug_printf("Warning: src/dst aliasing in instruction" - " is not handled:\n"); - tgsi_dump_instruction(inst, 1); - } - } + debug_printf("Warning: src/dst aliasing in instruction" + " is not handled:\n"); + debug_printf("Warning: "); + tgsi_dump_instruction(inst, 1); + + return FALSE; } } @@ -2954,7 +2975,8 @@ tgsi_emit_sse2( tgsi_get_processor_name(proc)); } - check_soa_dependencies(&parse.FullToken.FullInstruction); + if (ok) + ok = check_soa_dependencies(&parse.FullToken.FullInstruction); break; case TGSI_TOKEN_TYPE_IMMEDIATE: diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 3cf6893a9b..7d13a17bdb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -96,7 +96,8 @@ struct ureg_program unsigned semantic_name; unsigned semantic_index; unsigned interp; - unsigned cylindrical_wrap; + unsigned char cylindrical_wrap; + unsigned char centroid; } fs_input[UREG_MAX_INPUT]; unsigned nr_fs_inputs; @@ -286,11 +287,12 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, struct ureg_src -ureg_DECL_fs_input_cyl(struct ureg_program *ureg, +ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, unsigned semantic_name, unsigned semantic_index, unsigned interp_mode, - unsigned cylindrical_wrap) + unsigned cylindrical_wrap, + unsigned centroid) { unsigned i; @@ -306,6 +308,7 @@ ureg_DECL_fs_input_cyl(struct ureg_program *ureg, ureg->fs_input[i].semantic_index = semantic_index; ureg->fs_input[i].interp = interp_mode; ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap; + ureg->fs_input[i].centroid = centroid; ureg->nr_fs_inputs++; } else { set_bad(ureg); @@ -1126,7 +1129,8 @@ emit_decl_fs(struct ureg_program *ureg, unsigned semantic_name, unsigned semantic_index, unsigned interpolate, - unsigned cylindrical_wrap) + unsigned cylindrical_wrap, + unsigned centroid) { union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); @@ -1138,6 +1142,7 @@ emit_decl_fs(struct ureg_program *ureg, out[0].decl.Interpolate = interpolate; out[0].decl.Semantic = 1; out[0].decl.CylindricalWrap = cylindrical_wrap; + out[0].decl.Centroid = centroid; out[1].value = 0; out[1].decl_range.First = index; @@ -1287,7 +1292,8 @@ static void emit_decls( struct ureg_program *ureg ) ureg->fs_input[i].semantic_name, ureg->fs_input[i].semantic_index, ureg->fs_input[i].interp, - ureg->fs_input[i].cylindrical_wrap); + ureg->fs_input[i].cylindrical_wrap, + ureg->fs_input[i].centroid); } } else { for (i = 0; i < ureg->nr_gs_inputs; i++) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 07fb01ab7b..acc463200a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -158,11 +158,27 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, */ struct ureg_src -ureg_DECL_fs_input_cyl(struct ureg_program *, +ureg_DECL_fs_input_cyl_centroid(struct ureg_program *, unsigned semantic_name, unsigned semantic_index, unsigned interp_mode, - unsigned cylindrical_wrap); + unsigned cylindrical_wrap, + unsigned centroid); + +static INLINE struct ureg_src +ureg_DECL_fs_input_cyl(struct ureg_program *ureg, + unsigned semantic_name, + unsigned semantic_index, + unsigned interp_mode, + unsigned cylindrical_wrap) +{ + return ureg_DECL_fs_input_cyl_centroid(ureg, + semantic_name, + semantic_index, + interp_mode, + cylindrical_wrap, + 0); +} static INLINE struct ureg_src ureg_DECL_fs_input(struct ureg_program *ureg, @@ -170,11 +186,11 @@ ureg_DECL_fs_input(struct ureg_program *ureg, unsigned semantic_index, unsigned interp_mode) { - return ureg_DECL_fs_input_cyl(ureg, + return ureg_DECL_fs_input_cyl_centroid(ureg, semantic_name, semantic_index, interp_mode, - 0); + 0, 0); } struct ureg_src diff --git a/src/gallium/auxiliary/util/u_atomic.h b/src/gallium/auxiliary/util/u_atomic.h index a156823390..8434491a42 100644 --- a/src/gallium/auxiliary/util/u_atomic.h +++ b/src/gallium/auxiliary/util/u_atomic.h @@ -29,6 +29,8 @@ #define PIPE_ATOMIC_ASM_MSVC_X86 #elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)) #define PIPE_ATOMIC_ASM_GCC_X86 +#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64)) +#define PIPE_ATOMIC_ASM_GCC_X86_64 #elif defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 401) #define PIPE_ATOMIC_GCC_INTRINSIC #else @@ -36,6 +38,51 @@ #endif +#if defined(PIPE_ATOMIC_ASM_GCC_X86_64) +#define PIPE_ATOMIC "GCC x86_64 assembly" + +#ifdef __cplusplus +extern "C" { +#endif + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) + +static INLINE boolean +p_atomic_dec_zero(int32_t *v) +{ + unsigned char c; + + __asm__ __volatile__("lock; decl %0; sete %1":"+m"(*v), "=qm"(c) + ::"memory"); + + return c != 0; +} + +static INLINE void +p_atomic_inc(int32_t *v) +{ + __asm__ __volatile__("lock; incl %0":"+m"(*v)); +} + +static INLINE void +p_atomic_dec(int32_t *v) +{ + __asm__ __volatile__("lock; decl %0":"+m"(*v)); +} + +static INLINE int32_t +p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new) +{ + return __sync_val_compare_and_swap(v, old, _new); +} + +#ifdef __cplusplus +} +#endif + +#endif /* PIPE_ATOMIC_ASM_GCC_X86_64 */ + #if defined(PIPE_ATOMIC_ASM_GCC_X86) diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index f93ef26ae7..a163f93cb8 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -268,7 +268,7 @@ void util_blitter_destroy(struct blitter_context *blitter) pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]); } - for (i = 0; i <= PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++) + for (i = 0; i <= PIPE_MAX_COLOR_BUFS; i++) if (ctx->fs_col[i]) pipe->delete_fs_state(pipe, ctx->fs_col[i]); @@ -964,16 +964,18 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, blitter_restore_CSOs(ctx); } -/* Clear a region of a depth stencil surface. */ -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf) +/* draw a rectangle across a region using a custom dsa stage - for r600g */ +void util_blitter_custom_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *zsurf, + struct pipe_surface *cbsurf, + void *dsa_stage, float depth) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; struct pipe_framebuffer_state fb_state; - assert(dstsurf->texture); - if (!dstsurf->texture) + assert(zsurf->texture); + if (!zsurf->texture) return; /* check the saved state */ @@ -981,8 +983,8 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter, assert(blitter->saved_fb_state.nr_cbufs != ~0); /* bind CSOs */ - pipe->bind_blend_state(pipe, ctx->blend_keep_color); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); + pipe->bind_blend_state(pipe, ctx->blend_write_color); + pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage); pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); @@ -990,15 +992,30 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter, pipe->bind_vertex_elements_state(pipe, ctx->velem_state); /* set a framebuffer state */ - fb_state.width = dstsurf->width; - fb_state.height = dstsurf->height; - fb_state.nr_cbufs = 0; - fb_state.cbufs[0] = 0; - fb_state.zsbuf = dstsurf; + fb_state.width = zsurf->width; + fb_state.height = zsurf->height; + fb_state.nr_cbufs = 1; + if (cbsurf) { + fb_state.cbufs[0] = cbsurf; + fb_state.nr_cbufs = 1; + } else { + fb_state.cbufs[0] = NULL; + fb_state.nr_cbufs = 0; + } + fb_state.zsbuf = zsurf; pipe->set_framebuffer_state(pipe, &fb_state); - blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); - blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, + blitter_set_dst_dimensions(ctx, zsurf->width, zsurf->height); + blitter->draw_rectangle(blitter, 0, 0, zsurf->width, zsurf->height, depth, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } + +/* flush a region of a depth stencil surface for r300g */ +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + util_blitter_custom_depth_stencil(blitter, dstsurf, NULL, + ctx->dsa_flush_depth_stencil, 0.0f); +} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index e33d2e283f..f9f96f25c7 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -203,6 +203,12 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, void util_blitter_flush_depth_stencil(struct blitter_context *blitter, struct pipe_surface *dstsurf); + +void util_blitter_custom_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *zsurf, + struct pipe_surface *cbsurf, + void *dsa_stage, float depth); + /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 016e73c4a1..1fbd83841c 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs +PIPE_FORMAT_X24S8_USCALED , plain, 1, 1, x24, u8 , , , _y__, zs +PIPE_FORMAT_S8X24_USCALED , plain, 1, 1, u8 , x24 , , , _x__, zs PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs +PIPE_FORMAT_X32_S8X24_USCALED , plain, 1, 1, x32, u8 , x24 , , _y__, zs # YUV formats # http://www.fourcc.org/yuv.php#UYVY diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c index 792d69c214..80081e22f7 100644 --- a/src/gallium/auxiliary/util/u_format_zs.c +++ b/src/gallium/auxiliary/util/u_format_zs.c @@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d } } + +void +util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); + +} + +void +util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} diff --git a/src/gallium/auxiliary/util/u_format_zs.h b/src/gallium/auxiliary/util/u_format_zs.h index 650db4b95f..1604cc3eee 100644 --- a/src/gallium/auxiliary/util/u_format_zs.h +++ b/src/gallium/auxiliary/util/u_format_zs.h @@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned void util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); +void +util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); +void +util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); #endif /* U_FORMAT_ZS_H_ */ diff --git a/src/gallium/auxiliary/util/u_index_modify.c b/src/gallium/auxiliary/util/u_index_modify.c new file mode 100644 index 0000000000..65b079ed53 --- /dev/null +++ b/src/gallium/auxiliary/util/u_index_modify.c @@ -0,0 +1,127 @@ +/* + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "pipe/p_context.h" +#include "util/u_index_modify.h" +#include "util/u_inlines.h" + +void util_shorten_ubyte_elts(struct pipe_context *context, + struct pipe_resource **elts, + int index_bias, + unsigned start, + unsigned count) +{ + struct pipe_screen* screen = context->screen; + struct pipe_resource* new_elts; + unsigned char *in_map; + unsigned short *out_map; + struct pipe_transfer *src_transfer, *dst_transfer; + unsigned i; + + new_elts = pipe_buffer_create(screen, + PIPE_BIND_INDEX_BUFFER, + 2 * count); + + in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); + out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); + + in_map += start; + + for (i = 0; i < count; i++) { + *out_map = (unsigned short)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, *elts, src_transfer); + pipe_buffer_unmap(context, new_elts, dst_transfer); + + *elts = new_elts; +} + +void util_rebuild_ushort_elts(struct pipe_context *context, + struct pipe_resource **elts, + int index_bias, + unsigned start, unsigned count) +{ + struct pipe_transfer *in_transfer = NULL; + struct pipe_transfer *out_transfer = NULL; + struct pipe_resource *new_elts; + unsigned short *in_map; + unsigned short *out_map; + unsigned i; + + new_elts = pipe_buffer_create(context->screen, + PIPE_BIND_INDEX_BUFFER, + 2 * count); + + in_map = pipe_buffer_map(context, *elts, + PIPE_TRANSFER_READ, &in_transfer); + out_map = pipe_buffer_map(context, new_elts, + PIPE_TRANSFER_WRITE, &out_transfer); + + in_map += start; + for (i = 0; i < count; i++) { + *out_map = (unsigned short)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, *elts, in_transfer); + pipe_buffer_unmap(context, new_elts, out_transfer); + + *elts = new_elts; +} + +void util_rebuild_uint_elts(struct pipe_context *context, + struct pipe_resource **elts, + int index_bias, + unsigned start, unsigned count) +{ + struct pipe_transfer *in_transfer = NULL; + struct pipe_transfer *out_transfer = NULL; + struct pipe_resource *new_elts; + unsigned int *in_map; + unsigned int *out_map; + unsigned i; + + new_elts = pipe_buffer_create(context->screen, + PIPE_BIND_INDEX_BUFFER, + 2 * count); + + in_map = pipe_buffer_map(context, *elts, + PIPE_TRANSFER_READ, &in_transfer); + out_map = pipe_buffer_map(context, new_elts, + PIPE_TRANSFER_WRITE, &out_transfer); + + in_map += start; + for (i = 0; i < count; i++) { + *out_map = (unsigned int)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, *elts, in_transfer); + pipe_buffer_unmap(context, new_elts, out_transfer); + + *elts = new_elts; +} diff --git a/src/gallium/winsys/r600/drm/radeon_draw.c b/src/gallium/auxiliary/util/u_index_modify.h index a126901495..01a6cae94f 100644 --- a/src/gallium/winsys/r600/drm/radeon_draw.c +++ b/src/gallium/auxiliary/util/u_index_modify.h @@ -1,5 +1,5 @@ /* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * Copyright 2010 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,40 +18,24 @@ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include "radeon_priv.h" + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* - * draw functions - */ -int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon) -{ - draw->radeon = radeon; - draw->state = calloc(radeon->max_states, sizeof(void*)); - if (draw->state == NULL) - return -ENOMEM; - return 0; -} +#ifndef UTIL_INDEX_MODIFY_H +#define UTIL_INDEX_MODIFY_H + +void util_shorten_ubyte_elts(struct pipe_context *context, + struct pipe_resource **elts, + int index_bias, + unsigned start, + unsigned count); -void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state) -{ - if (state == NULL) - return; - draw->state[state->state_id] = state; -} +void util_rebuild_ushort_elts(struct pipe_context *context, + struct pipe_resource **elts, + int index_bias, + unsigned start, unsigned count); -void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state) -{ - if (state == NULL) - return; - if (draw->state[state->state_id] == state) { - draw->state[state->state_id] = NULL; - } -} +void util_rebuild_uint_elts(struct pipe_context *context, + struct pipe_resource **elts, + int index_bias, + unsigned start, unsigned count); +#endif diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 69a7681494..37294b7203 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val) #endif +#ifndef M_SQRT2 +#define M_SQRT2 1.41421356237309504880 +#endif + + #if defined(_MSC_VER) #if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index aae8b8bdf1..5378f2d782 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -394,7 +394,7 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color * return; case PIPE_FORMAT_B4G4R4A4_UNORM: { - uc->ub = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); } return; case PIPE_FORMAT_A8_UNORM: @@ -434,8 +434,8 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color * /* Integer versions of util_pack_z and util_pack_z_stencil - useful for * constructing clear masks. */ -static INLINE uint -util_pack_uint_z(enum pipe_format format, unsigned z) +static INLINE uint32_t +util_pack_mask_z(enum pipe_format format, uint32_t z) { switch (format) { case PIPE_FORMAT_Z16_UNORM: @@ -452,29 +452,32 @@ util_pack_uint_z(enum pipe_format format, unsigned z) case PIPE_FORMAT_S8_USCALED: return 0; default: - debug_print_format("gallium: unhandled format in util_pack_z()", format); + debug_print_format("gallium: unhandled format in util_pack_mask_z()", format); assert(0); return 0; } } -static INLINE uint -util_pack_uint_z_stencil(enum pipe_format format, double z, uint s) +static INLINE uint32_t +util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) { - unsigned packed = util_pack_uint_z(format, z); - - s &= 0xff; + uint32_t packed = util_pack_mask_z(format, z); switch (format) { case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return packed | (s << 24); + packed |= (uint32_t)s << 24; + break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return packed | s; + packed |= s; + break; case PIPE_FORMAT_S8_USCALED: - return packed | s; + packed |= s; + break; default: - return packed; + break; } + + return packed; } @@ -482,9 +485,11 @@ util_pack_uint_z_stencil(enum pipe_format format, double z, uint s) /** * Note: it's assumed that z is in [0,1] */ -static INLINE uint +static INLINE uint32_t util_pack_z(enum pipe_format format, double z) { + union fi fui; + if (z == 0.0) return 0; @@ -492,24 +497,25 @@ util_pack_z(enum pipe_format format, double z) case PIPE_FORMAT_Z16_UNORM: if (z == 1.0) return 0xffff; - return (uint) (z * 0xffff); + return (uint32_t) (z * 0xffff); case PIPE_FORMAT_Z32_UNORM: /* special-case to avoid overflow */ if (z == 1.0) return 0xffffffff; - return (uint) (z * 0xffffffff); + return (uint32_t) (z * 0xffffffff); case PIPE_FORMAT_Z32_FLOAT: - return (uint)z; + fui.f = (float)z; + return fui.ui; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_Z24X8_UNORM: if (z == 1.0) return 0xffffff; - return (uint) (z * 0xffffff); + return (uint32_t) (z * 0xffffff); case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: if (z == 1.0) return 0xffffff00; - return ((uint) (z * 0xffffff)) << 8; + return ((uint32_t) (z * 0xffffff)) << 8; case PIPE_FORMAT_S8_USCALED: /* this case can get it via util_pack_z_stencil() */ return 0; @@ -525,14 +531,14 @@ util_pack_z(enum pipe_format format, double z) * Pack Z and/or stencil values into a 32-bit value described by format. * Note: it's assumed that z is in [0,1] and s in [0,255] */ -static INLINE uint -util_pack_z_stencil(enum pipe_format format, double z, uint s) +static INLINE uint32_t +util_pack_z_stencil(enum pipe_format format, double z, uint8_t s) { - unsigned packed = util_pack_z(format, z); + uint32_t packed = util_pack_z(format, z); switch (format) { case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - packed |= s << 24; + packed |= (uint32_t)s << 24; break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: packed |= s; diff --git a/src/gallium/auxiliary/util/u_simple_list.h b/src/gallium/auxiliary/util/u_simple_list.h index f5f43b0faa..fe59771371 100644 --- a/src/gallium/auxiliary/util/u_simple_list.h +++ b/src/gallium/auxiliary/util/u_simple_list.h @@ -46,6 +46,8 @@ do { \ (elem)->next->prev = (elem)->prev; \ (elem)->prev->next = (elem)->next; \ + (elem)->next = elem; \ + (elem)->prev = elem; \ } while (0) /** diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 87959ab0aa..1df6c87267 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -71,6 +71,96 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ +union m128i { + __m128i m; + ubyte ub[16]; + ushort us[8]; + uint ui[4]; +}; + +static INLINE void u_print_epi8(const char *name, __m128i r) +{ + union { __m128i m; ubyte ub[16]; } u; + u.m = r; + + debug_printf("%s: " + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x\n", + name, + u.ub[0], u.ub[1], u.ub[2], u.ub[3], + u.ub[4], u.ub[5], u.ub[6], u.ub[7], + u.ub[8], u.ub[9], u.ub[10], u.ub[11], + u.ub[12], u.ub[13], u.ub[14], u.ub[15]); +} + +static INLINE void u_print_epi16(const char *name, __m128i r) +{ + union { __m128i m; ushort us[8]; } u; + u.m = r; + + debug_printf("%s: " + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x\n", + name, + u.us[0], u.us[1], u.us[2], u.us[3], + u.us[4], u.us[5], u.us[6], u.us[7]); +} + +static INLINE void u_print_epi32(const char *name, __m128i r) +{ + union { __m128i m; uint ui[4]; } u; + u.m = r; + + debug_printf("%s: " + "%08x/" + "%08x/" + "%08x/" + "%08x\n", + name, + u.ui[0], u.ui[1], u.ui[2], u.ui[3]); +} + +static INLINE void u_print_ps(const char *name, __m128 r) +{ + union { __m128 m; float f[4]; } u; + u.m = r; + + debug_printf("%s: " + "%f/" + "%f/" + "%f/" + "%f\n", + name, + u.f[0], u.f[1], u.f[2], u.f[3]); +} + + +#define U_DUMP_EPI32(a) u_print_epi32(#a, a) +#define U_DUMP_EPI16(a) u_print_epi16(#a, a) +#define U_DUMP_EPI8(a) u_print_epi8(#a, a) +#define U_DUMP_PS(a) u_print_ps(#a, a) + + #if defined(PIPE_ARCH_SSSE3) @@ -78,8 +168,6 @@ _mm_castps_si128(__m128 a) #else /* !PIPE_ARCH_SSSE3 */ -#include <emmintrin.h> - /** * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases * where -mssse3 is not supported/enabled. @@ -100,6 +188,68 @@ _mm_shuffle_epi8(__m128i a, __m128i mask) #endif /* !PIPE_ARCH_SSSE3 */ -#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ + + +/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of + * _mm_mul_epu32(). + * + * I suspect this works fine for us because one of our operands is + * always positive, but not sure that this can be used for general + * signed integer multiplication. + * + * This seems close enough to the speed of SSE4 and the real + * _mm_mullo_epi32() intrinsic as to not justify adding an sse4 + * dependency at this point. + */ +static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b) +{ + __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */ + __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */ + __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */ + __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */ + + /* Interleave the results, either with shuffles or (slightly + * faster) direct bit operations: + */ +#if 0 + __m128i ba8 = _mm_shuffle_epi32(ba, 8); + __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8); + __m128i result = _mm_unpacklo_epi32(ba8, b4a48); +#else + __m128i mask = _mm_setr_epi32(~0,0,~0,0); + __m128i ba_mask = _mm_and_si128(ba, mask); + __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32); + __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift); +#endif + + return result; +} + + +static INLINE void +transpose4_epi32(const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict o, + __m128i * restrict p, + __m128i * restrict q, + __m128i * restrict r) +{ + __m128i t0 = _mm_unpacklo_epi32(*a, *b); + __m128i t1 = _mm_unpacklo_epi32(*c, *d); + __m128i t2 = _mm_unpackhi_epi32(*a, *b); + __m128i t3 = _mm_unpackhi_epi32(*c, *d); + + *o = _mm_unpacklo_epi64(t0, t1); + *p = _mm_unpackhi_epi64(t0, t1); + *q = _mm_unpacklo_epi64(t2, t3); + *r = _mm_unpackhi_epi64(t2, t3); +} + +#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) + + +#endif /* PIPE_ARCH_SSE */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index af99163b2e..f78b6838a7 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -332,7 +332,7 @@ util_clear_depth_stencil(struct pipe_context *pipe, uint32_t *row = (uint32_t *)dst_map; for (j = 0; j < width; j++) { uint32_t tmp = *row & dst_mask; - *row++ = tmp & (zstencil & ~dst_mask); + *row++ = tmp | (zstencil & ~dst_mask); } dst_map += dst_stride; } diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index f7aa1403d0..44cadbfcdd 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -217,6 +217,81 @@ z24s8_get_tile_rgba(const unsigned *src, } } +/*** PIPE_FORMAT_S8X24_USCALED ***/ + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +s8x24_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)((*src++ >> 24) & 0xff); + } + + p += dst_stride; + } +} + +/*** PIPE_FORMAT_X24S8_USCALED ***/ + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +x24s8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)(*src++ & 0xff); + } + p += dst_stride; + } +} + + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +s8_get_tile_rgba(const unsigned char *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)(*src++ & 0xff); + } + p += dst_stride; + } +} /*** PIPE_FORMAT_Z32_FLOAT ***/ @@ -261,10 +336,19 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_Z24X8_UNORM: s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_S8_USCALED: + s8_get_tile_rgba((unsigned char *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_X24S8_USCALED: + s8x24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_S8X24_USCALED: + x24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_Z32_FLOAT: z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); break; diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst index ee3419ccfc..d547055096 100644 --- a/src/gallium/docs/source/cso/rasterizer.rst +++ b/src/gallium/docs/source/cso/rasterizer.rst @@ -124,17 +124,24 @@ Points sprite_coord_enable ^^^^^^^^^^^^^^^^^^^ -Specifies if a texture unit has its texture coordinates replaced or not. This -is a packed bitfield containing the enable for all texcoords -- if all bits -are zero, point sprites are effectively disabled. +Controls automatic texture coordinate generation for rendering sprite points. + +When bit k in the sprite_coord_enable bitfield is set, then generic +input k to the fragment shader will get an automatically computed +texture coordinate. + +The texture coordinate will be of the form (s, t, 0, 1) where s varies +from 0 to 1 from left to right while t varies from 0 to 1 according to +the state of 'sprite_coord_mode' (see below). If any bit is set, then point_smooth MUST be disabled (there are no round sprites) and point_quad_rasterization MUST be true (sprites are always rasterized as quads). Any mismatch between these states should be considered a bug in the state-tracker. -If enabled, the four vertices of the resulting quad will be assigned -texture coordinates, according to sprite_coord_mode. +This feature is implemented in the :ref:`Draw` module but may also be +implemented natively by GPUs or implemented with a geometry shader. + sprite_coord_mode ^^^^^^^^^^^^^^^^^ @@ -145,6 +152,7 @@ have coordinates (0,0,0,1). For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left vertex will have coordinates (0,0,0,1). This state is used by :ref:`Draw` to generate texcoords. + point_quad_rasterization ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/gallium/docs/source/pipeline.txt b/src/gallium/docs/source/pipeline.txt new file mode 100644 index 0000000000..fd1fbe9c76 --- /dev/null +++ b/src/gallium/docs/source/pipeline.txt @@ -0,0 +1,128 @@ +XXX this could be converted/formatted for Sphinx someday. +XXX do not use tabs in this file. + + + + position ] + primary/secondary colors ] + generics (normals, ] + texcoords, fog) ] User vertices / arrays + point size ] + edge flag ] + primitive ID } System-generated values + vertex ID } + | | | + V V V + +-------------------+ + | Vertex shader | + +-------------------+ + | | | + V V V + position + clip distance + generics + front/back & primary/secondary colors + point size + edge flag + primitive ID + | | | + V V V + +------------------------+ + | Geometry shader | + | (consume vertex ID) | + | (may change prim type) | + +------------------------+ + | | | + V V V + [...] + fb layer + | | | + V V V + +--------------------------+ + | Clipper | + | (consume clip distances) | + +--------------------------+ + | | | + V V V + +-------------------+ + | Polygon Culling | + +-------------------+ + | | | + V V V + +-----------------------+ + | Choose front or | + | back face color | + | (consume other color) | + +-----------------------+ + | | | + V V V + [...] + primary/secondary colors only + | | | + V V V + +-------------------+ + | Polygon Offset | + +-------------------+ + | | | + V V V + +----------------------+ + | Unfilled polygons | + | (consume edge flags) | + | (change prim type) | + +----------------------+ + | | | + V V V + position + generics + primary/secondary colors + point size + primitive ID + fb layer + | | | + V V V + +---------------------------------+ + | Optional Draw module helpers | + | * Polygon Stipple | + | * Line Stipple | + | * Line AA/smooth (as tris) | + | * Wide lines (as tris) | + | * Wide points/sprites (as tris) | + | * Point AA/smooth (as tris) | + | (NOTE: these stages may emit | + | new/extra generic attributes | + | such as texcoords) | + +---------------------------------+ + | | | + V V V + position ] + generics (+ new/extra ones) ] + primary/secondary colors ] Software rast vertices + point size ] + primitive ID ] + fb layer ] + | | | + V V V + +---------------------+ + | Triangle/Line/Point | + | Rasterization | + +---------------------+ + | | | + V V V + generic attribs + primary/secondary colors + primitive ID + fragment win coord pos } System-generated values + front/back face flag } + | | | + V V V + +-------------------+ + | Fragment shader | + +-------------------+ + | | | + V V V + zero or more colors + zero or one Z value + + +NOTE: The instance ID is not shown. It can be imagined to be a global variable +accessible to all shader stages. diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index e588c5b7bd..d99ed7c6d6 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -678,8 +678,6 @@ This instruction replicates its result. pc = pop() - Potential restrictions: - * Only occurs at end of function. .. opcode:: SSG - Set Sign @@ -726,7 +724,7 @@ This instruction replicates its result. dst.z = 0 - dst.y = 1 + dst.w = 1 .. opcode:: TXB - Texture Lookup With Bias @@ -1417,6 +1415,12 @@ Edge flags are used to control which lines or points are actually drawn when the polygon mode converts triangles/quads/polygons into points or lines. +TGSI_SEMANTIC_STENCIL +"""""""""""""""""""""" + +For fragment shaders, this semantic label indicates than an output +is a writable stencil reference value. Only the Y component is writable. +This allows the fragment shader to change the fragments stencilref value. Properties @@ -1495,6 +1499,8 @@ well. | Z | XXX TBD | (z, z, z, 1) | (0, z, 0, 1) | | | | [#depth-tex-mode]_ | | +--------------------+--------------+--------------------+--------------+ +| S | (s, s, s, s) | unknown | unknown | ++--------------------+--------------+--------------------+--------------+ .. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt .. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z) diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 7bb7893d93..bd059d5716 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -30,7 +30,6 @@ #include "i915_context.h" #include "i915_batch.h" #include "i915_debug.h" -#include "i915_reg.h" #include "i915_resource.h" #include "pipe/p_context.h" diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c index bd8b9174a8..36c04a3165 100644 --- a/src/gallium/drivers/i965/intel_decode.c +++ b/src/gallium/drivers/i965/intel_decode.c @@ -40,6 +40,7 @@ #include <stdint.h> #include <string.h> +#include "util/u_memory.h" #include "util/u_string.h" #include "intel_decode.h" @@ -116,8 +117,7 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) }; - for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_mi); opcode++) { if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { unsigned int len = 1, i; @@ -275,8 +275,7 @@ decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; } - for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_2d); opcode++) { if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) { unsigned int i; @@ -1037,9 +1036,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, return len; } - for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]); - opcode++) - { + for (opcode = 0; opcode < Elements(opcodes_3d_1d); opcode++) { if (opcodes_3d_1d[opcode].i830_only && !i830) continue; @@ -1291,8 +1288,7 @@ decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { unsigned int len = 1, i; @@ -1637,8 +1633,7 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures return len; } - for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) { unsigned int i; len = 1; @@ -1705,8 +1700,7 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { unsigned int len = 1, i; diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 379f14b43d..726a19db2b 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -64,12 +64,12 @@ PROGS := lp_test_format \ # Need this for the lp_test_*.o files CLEAN_EXTRA = *.o +include ../../Makefile.template + lp_test_sincos.o : sse_mathfun.h PROGS_DEPS := ../../auxiliary/libgallium.a -include ../../Makefile.template - lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 8b5539d2c5..ec30d4d708 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -131,6 +131,44 @@ replacing the native ICD driver, but it's quite an advanced usage, so if you need to ask, don't even try it. +Profiling +========= + +To profile llvmpipe you should pass the options + + scons debug=no profile=yes <same-as-before> + +This will ensure that frame pointers are used both in C and JIT functions, and +that no tail call optimizations are done by gcc. + + +To better profile JIT code you'll need to build LLVM with oprofile integration. + + source_dir=$PWD/llvm-2.6 + build_dir=$source_dir/build/profile + install_dir=$source_dir-profile + + mkdir -p "$build_dir" + cd "$build_dir" && \ + $source_dir/configure \ + --prefix=$install_dir \ + --enable-optimized \ + --disable-profiling \ + --enable-targets=host-only \ + --with-oprofile + + make -C "$build_dir" + make -C "$build_dir" install + + find "$install_dir/lib" -iname '*.a' -print0 | xargs -0 strip --strip-debug + +The you should define + + export LLVM=/path/to/llvm-2.6-profile + +and rebuild. + + Unit testing ============ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index f893878daa..d63bdd72a7 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -27,13 +27,7 @@ env.Depends('lp_tile_soa.c', [ ]) -# Only enable SSSE3 for lp_tile_soa_sse3.c -ssse3_env = env.Clone() -if env['gcc'] \ - and distutils.version.LooseVersion(env['CCVERSION']) >= distutils.version.LooseVersion('4.3') \ - and env['machine'] in ('x86', 'x86_64') : - ssse3_env.Append(CCFLAGS = ['-mssse3']) -lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c') +lp_tile_soa_os = env.SharedObject('lp_tile_soa.c') llvmpipe = env.ConvenienceLibrary( diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index e28efe778f..e50643790c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref) + LLVMValueRef ref, + boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; @@ -60,4 +61,7 @@ lp_build_alpha_test(LLVMBuilderRef builder, lp_build_name(test, "alpha_mask"); lp_build_mask_update(mask, test); + + if (do_branch) + lp_build_mask_check(mask); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 44603b418c..27ca8aad4d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref); + LLVMValueRef ref, + boolean do_branch); #endif /* !LP_BLD_ALPHA_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index b5924cbb7d..d1c9b88f9b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -320,9 +320,6 @@ lp_build_blend_aos(LLVMBuilderRef builder, if(!blend->rt[rt].blend_enable) return src; - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); - /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, builder, type); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index b9c7a6ceed..30d261e979 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -195,6 +195,13 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, } +static boolean +lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) +{ + return dst_factor == (src_factor ^ 0x10); +} + + /** * Generate blend code in SOA mode. * \param rt render target index (to index the blend / colormask state) @@ -243,8 +250,41 @@ lp_build_blend_soa(LLVMBuilderRef builder, unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); + if (func == PIPE_BLEND_ADD && + lp_build_blend_factor_complementary(src_factor, dst_factor) && 0) { + /* + * Special case linear interpolation, (i.e., complementary factors). + */ + + LLVMValueRef weight; + if (src_factor < dst_factor) { + weight = lp_build_blend_soa_factor(&bld, src_factor, i); + res[i] = lp_build_lerp(&bld.base, weight, dst[i], src[i]); + } else { + weight = lp_build_blend_soa_factor(&bld, dst_factor, i); + res[i] = lp_build_lerp(&bld.base, weight, src[i], dst[i]); + } + continue; + } + + if ((func == PIPE_BLEND_ADD || + func == PIPE_BLEND_SUBTRACT || + func == PIPE_BLEND_REVERSE_SUBTRACT) && + src_factor == dst_factor && + type.floating) { + /* + * Special common factor. + * + * XXX: Only for floating points for now, since saturation will + * cause different results. + */ + + LLVMValueRef factor; + factor = lp_build_blend_soa_factor(&bld, src_factor, i); + res[i] = lp_build_blend_func(&bld.base, func, src[i], dst[i]); + res[i] = lp_build_mul(&bld.base, res[i], factor); + continue; + } /* * Compute src/dst factors. diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 7561899a74..7eb76d4fb3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -53,15 +53,8 @@ * ... ... ... ... ... ... ... ... ... * * - * Stencil test: - * Two-sided stencil test is supported but probably not as efficient as - * it could be. Currently, we use if/then/else constructs to do the - * operations for front vs. back-facing polygons. We could probably do - * both the front and back arithmetic then use a Select() instruction to - * choose the result depending on polyon orientation. We'd have to - * measure performance both ways and see which is better. - * * @author Jose Fonseca <jfonseca@vmware.com> + * @author Brian Paul <jfonseca@vmware.com> */ #include "pipe/p_state.h" @@ -71,6 +64,7 @@ #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_bitarit.h" #include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_intr.h" @@ -128,57 +122,32 @@ lp_build_stencil_test_single(struct lp_build_context *bld, /** * Do the one or two-sided stencil test comparison. * \sa lp_build_stencil_test_single - * \param face an integer indicating front (+) or back (-) facing polygon. - * If NULL, assume front-facing. + * \param front_facing an integer vector mask, indicating front (~0) or back + * (0) facing polygon. If NULL, assume front-facing. */ static LLVMValueRef lp_build_stencil_test(struct lp_build_context *bld, const struct pipe_stencil_state stencil[2], LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, - LLVMValueRef face) + LLVMValueRef front_facing) { LLVMValueRef res; assert(stencil[0].enabled); - if (stencil[1].enabled && face) { - /* do two-sided test */ - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - LLVMValueRef front_facing; - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef result = bld->undef; - - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + /* do front face test */ + res = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); - lp_build_flow_scope_declare(flow_ctx, &result); + if (stencil[1].enabled && front_facing) { + /* do back face test */ + LLVMValueRef back_res; - /* front_facing = face > 0.0 */ - front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); - { - result = lp_build_stencil_test_single(bld, &stencil[0], - stencilRefs[0], stencilVals); - } - lp_build_else(&if_ctx); - { - result = lp_build_stencil_test_single(bld, &stencil[1], - stencilRefs[1], stencilVals); - } - lp_build_endif(&if_ctx); + back_res = lp_build_stencil_test_single(bld, &stencil[1], + stencilRefs[1], stencilVals); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); - - res = result; - } - else { - /* do single-side test */ - res = lp_build_stencil_test_single(bld, &stencil[0], - stencilRefs[0], stencilVals); + res = lp_build_select(bld, front_facing, res, back_res); } return res; @@ -195,14 +164,12 @@ lp_build_stencil_op_single(struct lp_build_context *bld, const struct pipe_stencil_state *stencil, enum stencil_op op, LLVMValueRef stencilRef, - LLVMValueRef stencilVals, - LLVMValueRef mask) + LLVMValueRef stencilVals) { - const unsigned stencilMax = 255; /* XXX fix */ struct lp_type type = bld->type; LLVMValueRef res; - LLVMValueRef max = lp_build_const_int_vec(type, stencilMax); + LLVMValueRef max = lp_build_const_int_vec(type, 0xff); unsigned stencil_op; assert(type.sign); @@ -255,19 +222,7 @@ lp_build_stencil_op_single(struct lp_build_context *bld, break; default: assert(0 && "bad stencil op mode"); - res = NULL; - } - - if (stencil->writemask != stencilMax) { - /* mask &= stencil->writemask */ - LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask); - mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); - /* res = (res & mask) | (stencilVals & ~mask) */ - res = lp_build_select_bitwise(bld, writemask, res, stencilVals); - } - else { - /* res = mask ? res : stencilVals */ - res = lp_build_select(bld, mask, res, stencilVals); + res = bld->undef; } return res; @@ -284,49 +239,40 @@ lp_build_stencil_op(struct lp_build_context *bld, LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, LLVMValueRef mask, - LLVMValueRef face) + LLVMValueRef front_facing) { - assert(stencil[0].enabled); - - if (stencil[1].enabled && face) { - /* do two-sided op */ - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - LLVMValueRef front_facing; - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef result = bld->undef; + LLVMValueRef res; - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + assert(stencil[0].enabled); - lp_build_flow_scope_declare(flow_ctx, &result); + /* do front face op */ + res = lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals); - /* front_facing = face > 0.0 */ - front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + if (stencil[1].enabled && front_facing) { + /* do back face op */ + LLVMValueRef back_res; - lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); - { - result = lp_build_stencil_op_single(bld, &stencil[0], op, - stencilRefs[0], stencilVals, mask); - } - lp_build_else(&if_ctx); - { - result = lp_build_stencil_op_single(bld, &stencil[1], op, - stencilRefs[1], stencilVals, mask); - } - lp_build_endif(&if_ctx); + back_res = lp_build_stencil_op_single(bld, &stencil[1], op, + stencilRefs[1], stencilVals); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + res = lp_build_select(bld, front_facing, res, back_res); + } - return result; + if (stencil->writemask != 0xff) { + /* mask &= stencil->writemask */ + LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask); + mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); + /* res = (res & mask) | (stencilVals & ~mask) */ + res = lp_build_select_bitwise(bld, writemask, res, stencilVals); } else { - /* do single-sided op */ - return lp_build_stencil_op_single(bld, &stencil[0], op, - stencilRefs[0], stencilVals, mask); + /* res = mask ? res : stencilVals */ + res = lp_build_select(bld, mask, res, stencilVals); } + + return res; } @@ -358,8 +304,13 @@ lp_depth_type(const struct util_format_description *format_desc, } else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { assert(format_desc->block.bits <= 32); - if(format_desc->channel[swizzle].normalized) - type.norm = TRUE; + assert(format_desc->channel[swizzle].normalized); + if (format_desc->channel[swizzle].size < format_desc->block.bits) { + /* Prefer signed integers when possible, as SSE has less support + * for unsigned comparison; + */ + type.sign = TRUE; + } } else assert(0); @@ -381,7 +332,7 @@ lp_depth_type(const struct util_format_description *format_desc, */ static boolean get_z_shift_and_mask(const struct util_format_description *format_desc, - unsigned *shift, unsigned *mask) + unsigned *shift, unsigned *width, unsigned *mask) { const unsigned total_bits = format_desc->block.bits; unsigned z_swizzle; @@ -397,12 +348,14 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) return FALSE; + *width = format_desc->channel[z_swizzle].size; + padding_right = 0; for (chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; padding_left = - total_bits - (padding_right + format_desc->channel[z_swizzle].size); + total_bits - (padding_right + *width); if (padding_left || padding_right) { unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; @@ -413,7 +366,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, *mask = 0xffffffff; } - *shift = padding_left; + *shift = padding_right; return TRUE; } @@ -457,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, * \param maskvalue is the depth test mask. * \param counter is a pointer of the uint32 counter. */ -static void +void lp_build_occlusion_count(LLVMBuilderRef builder, struct lp_type type, LLVMValueRef maskvalue, @@ -494,31 +447,57 @@ lp_build_occlusion_count(LLVMBuilderRef builder, * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) - * \param z_src the incoming depth/stencil values (a 2x2 quad) + * \param z_src the incoming depth/stencil values (a 2x2 quad, float32) * \param zs_dst_ptr pointer to depth/stencil values in framebuffer - * \param facing contains float value indicating front/back facing polygon + * \param facing contains boolean value indicating front/back facing polygon */ void lp_build_depth_stencil_test(LLVMBuilderRef builder, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], - struct lp_type type, + struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, - LLVMValueRef counter) + LLVMValueRef *zs_value, + boolean do_branch) { - struct lp_build_context bld; - struct lp_build_context sbld; + struct lp_type z_type; + struct lp_build_context z_bld; + struct lp_build_context s_bld; struct lp_type s_type; + unsigned z_shift = 0, z_width = 0, z_mask = 0; LLVMValueRef zs_dst, z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; - LLVMValueRef orig_mask = mask->value; + LLVMValueRef orig_mask = lp_build_mask_value(mask); + LLVMValueRef front_facing = NULL; + + + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(z_src_type.floating) { + z_src_type.sign = FALSE; + z_src_type.norm = TRUE; + } + else { + assert(!z_src_type.sign); + assert(z_src_type.norm); + } + + /* Pick the depth type. */ + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + + /* FIXME: Cope with a depth test type with a different bit width. */ + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); /* Sanity checking */ { @@ -540,8 +519,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } assert(z_swizzle < 4); - assert(format_desc->block.bits == type.width); - if (type.floating) { + assert(format_desc->block.bits == z_type.width); + if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); @@ -552,54 +531,56 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); - assert(!type.fixed); - assert(!type.sign); - assert(type.norm); + assert(!z_type.fixed); } } /* Setup build context for Z vals */ - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&z_bld, builder, z_type); /* Setup build context for stencil vals */ - s_type = lp_type_int_vec(type.width); - lp_build_context_init(&sbld, builder, s_type); + s_type = lp_type_int_vec(z_type.width); + lp_build_context_init(&s_bld, builder, s_type); /* Load current z/stencil value from z/stencil buffer */ + zs_dst_ptr = LLVMBuildBitCast(builder, + zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); - lp_build_name(zs_dst, "zsbufval"); + lp_build_name(zs_dst, "zs_dst"); /* Compute and apply the Z/stencil bitmasks and shifts. */ { - unsigned z_shift, z_mask; unsigned s_shift, s_mask; - if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { - if (z_shift) { - LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); - z_src = LLVMBuildLShr(builder, z_src, shift, ""); - } - + if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { if (z_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); - z_src = LLVMBuildAnd(builder, z_src, mask, ""); - z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); - z_bitmask = mask; /* used below */ + z_bitmask = lp_build_const_int_vec(z_type, z_mask); } - else { + + /* + * Align the framebuffer Z 's LSB to the right. + */ + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); + } else if (z_bitmask) { + /* TODO: Instead of loading a mask from memory and ANDing, it's + * probably faster to just shake the bits with two shifts. */ + z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); + } else { z_dst = zs_dst; + lp_build_name(z_dst, "z_dst"); } - - lp_build_name(z_dst, "zsbuf.z"); } if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { - LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); + LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift); stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); stencil_shift = shift; /* used below */ } @@ -608,35 +589,85 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } if (s_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); + LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } - lp_build_name(stencil_vals, "stencil"); + lp_build_name(stencil_vals, "s_dst"); } } - if (stencil[0].enabled) { + + if (face) { + LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); + + /* front_facing = face != 0 ? ~0 : 0 */ + front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); + front_facing = LLVMBuildSExt(builder, front_facing, + LLVMIntType(s_bld.type.length*s_bld.type.width), + ""); + front_facing = LLVMBuildBitCast(builder, front_facing, + s_bld.int_vec_type, ""); + } + /* convert scalar stencil refs into vectors */ - stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); - stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); + stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); - s_pass_mask = lp_build_stencil_test(&sbld, stencil, - stencil_refs, stencil_vals, face); + s_pass_mask = lp_build_stencil_test(&s_bld, stencil, + stencil_refs, stencil_vals, + front_facing); /* apply stencil-fail operator */ { - LLVMValueRef s_fail_mask = lp_build_andnot(&bld, orig_mask, s_pass_mask); - stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, + LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, - s_fail_mask, face); + s_fail_mask, front_facing); } } if (depth->enabled) { + /* + * Convert fragment Z to the desired type, aligning the LSB to the right. + */ + + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); + assert(lp_check_value(z_src_type, z_src)); + if (z_src_type.floating) { + /* + * Convert from floating point values + */ + + if (!z_type.floating) { + z_src = lp_build_clamped_float_to_unsigned_norm(builder, + z_src_type, + z_width, + z_src); + } + } else { + /* + * Convert from unsigned normalized values. + */ + + assert(!z_src_type.sign); + assert(!z_src_type.fixed); + assert(z_src_type.norm); + assert(!z_type.floating); + if (z_src_type.width > z_width) { + LLVMValueRef shift = lp_build_const_int_vec(z_src_type, + z_src_type.width - z_width); + z_src = LLVMBuildLShr(builder, z_src, shift, ""); + } + } + assert(lp_check_value(z_type, z_src)); + + lp_build_name(z_src, "z_src"); + /* compare src Z to dst Z, returning 'pass' mask */ - z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); + z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only @@ -644,28 +675,28 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); + + if (do_branch) { + lp_build_mask_check(mask); + do_branch = FALSE; + } } if (depth->writemask) { - LLVMValueRef zselectmask = mask->value; + LLVMValueRef zselectmask; /* mask off bits that failed Z test */ - zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); + zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } - /* if combined Z/stencil format, mask off the stencil bits */ - if (z_bitmask) { - zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); - } - /* Mix the old and new Z buffer values. - * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) + * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] */ - z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); + z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { @@ -673,33 +704,35 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ - z_fail_mask = lp_build_andnot(&bld, orig_mask, z_pass); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, + z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, - z_fail_mask, face); + z_fail_mask, front_facing); /* apply Z-pass operator */ - z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, - z_pass_mask, face); + z_pass_mask, front_facing); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ - s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, - s_pass_mask, face); + s_pass_mask, front_facing); } - /* The Z bits are already in the right place but we may need to shift the - * stencil bits before ORing Z with Stencil to make the final pixel value. - */ + /* Put Z and ztencil bits in the right place */ + if (z_dst && z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildShl(builder, z_dst, shift, ""); + } if (stencil_vals && stencil_shift) - stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, + stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals, stencil_shift, ""); /* Finally, merge/store the z/stencil values */ @@ -707,13 +740,13 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, (stencil[0].enabled && stencil[0].writemask)) { if (z_dst && stencil_vals) - zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); + zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, ""); else if (z_dst) zs_dst = z_dst; else zs_dst = stencil_vals; - LLVMBuildStore(builder, zs_dst, zs_dst_ptr); + *zs_value = zs_dst; } if (s_pass_mask) @@ -722,6 +755,47 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); - if (counter) - lp_build_occlusion_count(builder, type, mask->value, counter); + if (do_branch) + lp_build_mask_check(mask); + +} + + +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(LLVMTypeOf(zs_value), 0), ""); + + LLVMBuildStore(builder, zs_value, zs_dst_ptr); +} + + +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + struct lp_type z_type; + struct lp_build_context z_bld; + LLVMValueRef z_dst; + + /* XXX: pointlessly redo type logic: + */ + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + lp_build_context_init(&z_bld, builder, z_type); + + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); + + z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); + z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst); + + LLVMBuildStore(builder, z_dst, zs_dst_ptr); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index e257a5bd7d..a54ef3a711 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -61,7 +61,27 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef zs_src, LLVMValueRef zs_dst_ptr, LLVMValueRef facing, - LLVMValueRef counter); + LLVMValueRef *zs_value, + boolean do_branch); +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + +void +lp_build_occlusion_count(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef maskvalue, + LLVMValueRef counter); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 2a374f8c39..c9da8900d0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -206,7 +206,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); /* - * a = a0 + x * dadx + y * dady + * a = a0 + (x * dadx + y * dady) */ if (attrib == 0 && chan == 0) { @@ -219,11 +219,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld, a = a0; if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { - LLVMValueRef tmp; - tmp = LLVMBuildFMul(builder, bld->x, dadx, ""); - a = LLVMBuildFAdd(builder, a, tmp, ""); - tmp = LLVMBuildFMul(builder, bld->y, dady, ""); - a = LLVMBuildFAdd(builder, a, tmp, ""); + LLVMValueRef ax, ay, axy; + ax = LLVMBuildFMul(builder, bld->x, dadx, ""); + ay = LLVMBuildFMul(builder, bld->y, dady, ""); + axy = LLVMBuildFAdd(builder, ax, ay, ""); + a = LLVMBuildFAdd(builder, a, axy, ""); } } @@ -272,7 +272,10 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * This is called when we move from one quad to the next. */ static void -attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) +attribs_update(struct lp_build_interp_soa_context *bld, + int quad_index, + int start, + int end) { struct lp_build_context *coeff_bld = &bld->coeff_bld; LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index); @@ -282,7 +285,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) assert(quad_index < 4); - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + for(attrib = start; attrib < end; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -350,6 +353,14 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) } #endif + if (attrib == 0 && chan == 2) { + /* FIXME: Depth values can exceed 1.0, due to the fact that + * setup interpolation coefficients refer to (0,0) which causes + * precision loss. So we must clamp to 1.0 here to avoid artifacts + */ + a = lp_build_min(coeff_bld, a, coeff_bld->one); + } + attrib_name(a, attrib, chan, ""); } bld->attribs[attrib][chan] = a; @@ -434,8 +445,6 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); - - attribs_update(bld, 0); } @@ -443,10 +452,20 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, * Advance the position and inputs to the given quad within the block. */ void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, - int quad_index) +lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + int quad_index) +{ + assert(quad_index < 4); + + attribs_update(bld, quad_index, 1, bld->num_attribs); +} + +void +lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, + int quad_index) { assert(quad_index < 4); - attribs_update(bld, quad_index); + attribs_update(bld, quad_index, 0, 1); } + diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 37479fca9d..a7ebdd1bfa 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -113,7 +113,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef y); void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, +lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + int quad_index); + +void +lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, int quad_index); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 8e6dfb293d..e09ec504ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -64,6 +64,11 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) elem_types[LP_JIT_TEXTURE_DATA] = LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), LP_MAX_TEXTURE_LEVELS); + elem_types[LP_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); + elem_types[LP_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); + elem_types[LP_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); + elem_types[LP_JIT_TEXTURE_BORDER_COLOR] = + LLVMArrayType(LLVMFloatType(), 4); texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -88,6 +93,19 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, screen->target, texture_type, LP_JIT_TEXTURE_DATA); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, min_lod, + screen->target, texture_type, + LP_JIT_TEXTURE_MIN_LOD); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, max_lod, + screen->target, texture_type, + LP_JIT_TEXTURE_MAX_LOD); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, lod_bias, + screen->target, texture_type, + LP_JIT_TEXTURE_LOD_BIAS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, border_color, + screen->target, texture_type, + LP_JIT_TEXTURE_BORDER_COLOR); + LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, screen->target, texture_type); @@ -144,9 +162,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) void lp_jit_screen_cleanup(struct llvmpipe_screen *screen) { - if(screen->engine) - LLVMDisposeExecutionEngine(screen->engine); - if(screen->pass) LLVMDisposePassManager(screen->pass); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index c94189413a..114f21f2d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -54,6 +54,11 @@ struct lp_jit_texture uint32_t row_stride[LP_MAX_TEXTURE_LEVELS]; uint32_t img_stride[LP_MAX_TEXTURE_LEVELS]; const void *data[LP_MAX_TEXTURE_LEVELS]; + /* sampler state, actually */ + float min_lod; + float max_lod; + float lod_bias; + float border_color[4]; }; @@ -65,6 +70,10 @@ enum { LP_JIT_TEXTURE_ROW_STRIDE, LP_JIT_TEXTURE_IMG_STRIDE, LP_JIT_TEXTURE_DATA, + LP_JIT_TEXTURE_MIN_LOD, + LP_JIT_TEXTURE_MAX_LOD, + LP_JIT_TEXTURE_LOD_BIAS, + LP_JIT_TEXTURE_BORDER_COLOR, LP_JIT_TEXTURE_NUM_FIELDS /* number of fields above */ }; @@ -135,7 +144,7 @@ typedef void (*lp_jit_frag_func)(const struct lp_jit_context *context, uint32_t x, uint32_t y, - float facing, + uint32_t facing, const void *a0, const void *dadx, const void *dady, diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index ff0e207a54..84c66dd36e 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -92,8 +92,9 @@ llvmpipe_get_query_result(struct pipe_context *pipe, int i; if (!pq->fence) { - assert(0); /* query not in issued state */ - return FALSE; + /* no fence because there was no scene, so results is zero */ + *result = 0; + return TRUE; } if (!lp_fence_signalled(pq->fence)) { diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d7e6415e13..d358a98394 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -211,8 +211,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_scene *scene = task->scene; - unsigned clear_value = arg.clear_zstencil.value; - unsigned clear_mask = arg.clear_zstencil.mask; + uint32_t clear_value = arg.clear_zstencil.value; + uint32_t clear_mask = arg.clear_zstencil.mask; const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; const unsigned block_size = scene->zsbuf.blocksize; @@ -220,7 +220,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, uint8_t *dst; unsigned i, j; - LP_DBG(DEBUG_RAST, "%s 0x%x%x\n", __FUNCTION__, clear_value, clear_mask); + LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", + __FUNCTION__, clear_value, clear_mask); /* * Clear the aera of the swizzled depth/depth buffer matching this tile, in @@ -232,16 +233,31 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, dst = task->depth_tile; + clear_value &= clear_mask; + switch (block_size) { case 1: + assert(clear_mask == 0xff); memset(dst, (uint8_t) clear_value, height * width); break; case 2: - for (i = 0; i < height; i++) { - uint16_t *row = (uint16_t *)dst; - for (j = 0; j < width; j++) - *row++ = (uint16_t) clear_value; - dst += dst_stride; + if (clear_mask == 0xffff) { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) clear_value; + dst += dst_stride; + } + } + else { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) { + uint16_t tmp = ~clear_mask & *row; + *row++ = clear_value | tmp; + } + dst += dst_stride; + } } break; case 4: @@ -258,7 +274,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, uint32_t *row = (uint32_t *)dst; for (j = 0; j < width; j++) { uint32_t tmp = ~clear_mask & *row; - *row++ = (clear_value & clear_mask) | tmp; + *row++ = clear_value | tmp; } dst += dst_stride; } @@ -318,7 +334,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, { const struct lp_scene *scene = task->scene; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; @@ -349,10 +365,10 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, 0xffff, @@ -398,7 +414,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, unsigned x, unsigned y, unsigned mask) { - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const struct lp_scene *scene = task->scene; uint8_t *color[PIPE_MAX_COLOR_BUFS]; @@ -430,10 +446,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, mask, @@ -474,6 +490,14 @@ lp_rast_end_query(struct lp_rasterizer_task *task, } +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + task->state = arg.state; +} + + /** * Set top row and left column of the tile's pixels to white. For debugging. @@ -581,10 +605,12 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = lp_rast_triangle_8, lp_rast_triangle_3_4, lp_rast_triangle_3_16, + lp_rast_triangle_4_16, lp_rast_shade_tile, lp_rast_shade_tile_opaque, lp_rast_begin_query, lp_rast_end_query, + lp_rast_set_state, }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index c55b97a9d1..a64c152cf8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -78,30 +78,28 @@ struct lp_rast_state { * These pointers point into the bin data buffer. */ struct lp_rast_shader_inputs { - float facing; /** Positive for front-facing, negative for back-facing */ - unsigned disable:1; /** Partially binned, disable this command */ - unsigned opaque:1; /** Is opaque */ - - float (*a0)[4]; - float (*dadx)[4]; - float (*dady)[4]; - - const struct lp_rast_state *state; + unsigned frontfacing:1; /** True for front-facing */ + unsigned disable:1; /** Partially binned, disable this command */ + unsigned opaque:1; /** Is opaque */ + unsigned pad0:29; /* wasted space */ + unsigned stride; /* how much to advance data between a0, dadx, dady */ + unsigned pad2; /* wasted space */ + unsigned pad3; /* wasted space */ + /* followed by a0, dadx, dady and planes[] */ }; - +/* Note: the order of these values is important as they are loaded by + * sse code in rasterization: + */ struct lp_rast_plane { - /* one-pixel sized trivial accept offsets for each plane */ - int ei; - - /* one-pixel sized trivial reject offsets for each plane */ - int eo; - /* edge function values at minx,miny ?? */ int c; int dcdx; int dcdy; + + /* one-pixel sized trivial reject offsets for each plane */ + int eo; }; /** @@ -111,17 +109,24 @@ struct lp_rast_plane { * Objects of this type are put into the lp_setup_context::data buffer. */ struct lp_rast_triangle { - /* inputs for the shader */ - struct lp_rast_shader_inputs inputs; - #ifdef DEBUG float v[3][2]; + float pad0; + float pad1; #endif - struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */ + /* inputs for the shader */ + struct lp_rast_shader_inputs inputs; + /* planes are also allocated here */ }; +#define GET_A0(inputs) ((float (*)[4])((inputs)+1)) +#define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride)) +#define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride)) +#define GET_PLANES(tri) ((struct lp_rast_plane *)((char *)(&(tri)->inputs + 1) + 3 * (tri)->inputs.stride)) + + struct lp_rasterizer * lp_rast_create( unsigned num_threads ); @@ -149,9 +154,10 @@ union lp_rast_cmd_arg { const struct lp_rast_state *set_state; uint8_t clear_color[4]; struct { - unsigned value; - unsigned mask; + uint32_t value; + uint32_t mask; } clear_zstencil; + const struct lp_rast_state *state; struct lp_fence *fence; struct llvmpipe_query *query_obj; }; @@ -238,12 +244,14 @@ lp_rast_arg_null( void ) #define LP_RAST_OP_TRIANGLE_8 0x9 #define LP_RAST_OP_TRIANGLE_3_4 0xa #define LP_RAST_OP_TRIANGLE_3_16 0xb -#define LP_RAST_OP_SHADE_TILE 0xc -#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd -#define LP_RAST_OP_BEGIN_QUERY 0xe -#define LP_RAST_OP_END_QUERY 0xf - -#define LP_RAST_OP_MAX 0x10 +#define LP_RAST_OP_TRIANGLE_4_16 0xc +#define LP_RAST_OP_SHADE_TILE 0xd +#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe +#define LP_RAST_OP_BEGIN_QUERY 0xf +#define LP_RAST_OP_END_QUERY 0x10 +#define LP_RAST_OP_SET_STATE 0x11 + +#define LP_RAST_OP_MAX 0x12 #define LP_RAST_OP_MASK 0xff void diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index 9fc78645a3..e2783aa568 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -12,6 +12,7 @@ static INLINE int u_bit_scan(unsigned *mask) struct tile { int coverage; int overdraw; + const struct lp_rast_state *state; char data[TILE_SIZE][TILE_SIZE]; }; @@ -42,10 +43,12 @@ static const char *cmd_names[LP_RAST_OP_MAX] = "triangle_8", "triangle_3_4", "triangle_3_16", + "triangle_4_16", "shade_tile", "shade_tile_opaque", "begin_query", "end_query", + "set_state", }; static const char *cmd_name(unsigned cmd) @@ -55,31 +58,31 @@ static const char *cmd_name(unsigned cmd) } static const struct lp_fragment_shader_variant * -get_variant( const struct cmd_block *block, - int k ) +get_variant( const struct lp_rast_state *state, + const struct cmd_block *block, + int k ) { if (block->cmd[k] == LP_RAST_OP_SHADE_TILE || - block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE) - return block->arg[k].shade_tile->state->variant; - - if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || + block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE || + block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || block->cmd[k] == LP_RAST_OP_TRIANGLE_2 || block->cmd[k] == LP_RAST_OP_TRIANGLE_3 || block->cmd[k] == LP_RAST_OP_TRIANGLE_4 || block->cmd[k] == LP_RAST_OP_TRIANGLE_5 || block->cmd[k] == LP_RAST_OP_TRIANGLE_6 || block->cmd[k] == LP_RAST_OP_TRIANGLE_7) - return block->arg[k].triangle.tri->inputs.state->variant; + return state->variant; return NULL; } static boolean -is_blend( const struct cmd_block *block, +is_blend( const struct lp_rast_state *state, + const struct cmd_block *block, int k ) { - const struct lp_fragment_shader_variant *variant = get_variant(block, k); + const struct lp_fragment_shader_variant *variant = get_variant(state, block, k); if (variant) return variant->key.blend.rt[0].blend_enable; @@ -92,6 +95,7 @@ is_blend( const struct cmd_block *block, static void debug_bin( const struct cmd_bin *bin ) { + const struct lp_rast_state *state; const struct cmd_block *head = bin->head; int i, j = 0; @@ -99,9 +103,12 @@ debug_bin( const struct cmd_bin *bin ) while (head) { for (i = 0; i < head->count; i++, j++) { + if (head->cmd[i] == LP_RAST_OP_SET_STATE) + state = head->arg[i].state; + debug_printf("%d: %s %s\n", j, cmd_name(head->cmd[i]), - is_blend(head, i) ? "blended" : ""); + is_blend(state, head, i) ? "blended" : ""); } head = head->next; } @@ -133,7 +140,7 @@ debug_shade_tile(int x, int y, char val) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - boolean blend = inputs->state->variant->key.blend.rt[0].blend_enable; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; unsigned i,j; if (inputs->disable) @@ -171,11 +178,12 @@ debug_triangle(int tilex, int tiley, { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); struct lp_rast_plane plane[8]; int x, y; int count = 0; unsigned i, nr_planes = 0; - boolean blend = tri->inputs.state->variant->key.blend.rt[0].blend_enable; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; if (tri->inputs.disable) { /* This triangle was partially binned and has been disabled */ @@ -183,7 +191,7 @@ debug_triangle(int tilex, int tiley, } while (plane_mask) { - plane[nr_planes] = tri->plane[u_bit_scan(&plane_mask)]; + plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)]; plane[nr_planes].c = (plane[nr_planes].c + plane[nr_planes].dcdy * tiley - plane[nr_planes].dcdx * tilex); @@ -235,12 +243,15 @@ do_debug_bin( struct tile *tile, for (block = bin->head; block; block = block->next) { for (k = 0; k < block->count; k++, j++) { - boolean blend = is_blend(block, k); + boolean blend = is_blend(tile->state, block, k); char val = get_label(j); int count = 0; if (print_cmds) debug_printf("%c: %15s", val, cmd_name(block->cmd[k])); + + if (block->cmd[k] == LP_RAST_OP_SET_STATE) + tile->state = block->arg[k].state; if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR || block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 7370119e96..b30408f097 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -77,6 +77,7 @@ struct cmd_bin; struct lp_rasterizer_task { const struct cmd_bin *bin; + const struct lp_rast_state *state; struct lp_scene *scene; unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ @@ -244,7 +245,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, unsigned x, unsigned y ) { const struct lp_scene *scene = task->scene; - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -260,10 +261,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, 0xffff, @@ -293,6 +294,14 @@ void lp_rast_triangle_3_4(struct lp_rasterizer_task *, void lp_rast_triangle_3_16( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); + +void lp_rast_triangle_4_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg); + void lp_debug_bin( const struct cmd_bin *bin ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index a1f309d4b0..042c315635 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -123,6 +123,16 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, } void +lp_rast_triangle_4_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<4)-1; + lp_rast_triangle_3(task, arg2); +} + +void lp_rast_triangle_3_4(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { @@ -230,144 +240,207 @@ sign_bits4(const __m128i *cstep, int cdiff) } -/* Special case for 3 plane triangle which is contained entirely - * within a 16x16 block. - */ +#define NR_PLANES 3 + + + + + + + void lp_rast_triangle_3_16(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; - unsigned mask = arg.triangle.plane_mask; - const int x = task->x + (mask & 0xff); - const int y = task->y + (mask >> 8); - unsigned outmask, inmask, partmask, partial_mask; - unsigned j; - __m128i cstep4[3][4]; - - outmask = 0; /* outside one or more trivial reject planes */ - partmask = 0; /* outside one or more trivial accept planes */ - - for (j = 0; j < 3; j++) { - const int dcdx = -plane[j].dcdx * 4; - const int dcdy = plane[j].dcdy * 4; - __m128i xdcdy = _mm_set1_epi32(dcdy); - - cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); - cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); - cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); - cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); - - { - const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; - const int cox = plane[j].eo * 4; - const int cio = plane[j].ei * 4 - 1; - - outmask |= sign_bits4(cstep4[j], c + cox); - partmask |= sign_bits4(cstep4[j], c + cio); - } - } + const struct lp_rast_plane *plane = GET_PLANES(tri); + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + unsigned i, j; + + struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; + unsigned nr = 0; + + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + __m128i rej4; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &rej4); + + /* Adjust dcdx; + */ + dcdx = _mm_sub_epi32(zero, dcdx); - if (outmask == 0xffff) - return; + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + rej4 = _mm_slli_epi32(rej4, 2); - /* Mask of sub-blocks which are inside all trivial accept planes: - */ - inmask = ~partmask & 0xffff; + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); - /* Mask of sub-blocks which are inside all trivial reject planes, - * but outside at least one trivial accept plane: - */ - partial_mask = partmask & ~outmask; + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); - assert((partial_mask & inmask) == 0); + for (i = 0; i < 4; i++) { + __m128i cx = c; - /* Iterate over partials: - */ - while (partial_mask) { - int i = ffs(partial_mask) - 1; - int ix = (i & 3) * 4; - int iy = (i >> 2) * 4; - int px = x + ix; - int py = y + iy; - unsigned mask = 0xffff; - - partial_mask &= ~(1 << i); - - for (j = 0; j < 3; j++) { - const int cx = (plane[j].c - - plane[j].dcdx * px - + plane[j].dcdy * py) * 4; - - mask &= ~sign_bits4(cstep4[j], cx); - } + for (j = 0; j < 4; j++) { + __m128i c4rej = _mm_add_epi32(cx, rej4); + __m128i rej_masks = _mm_srai_epi32(c4rej, 31); - if (mask) - lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); - } + /* if (is_zero(rej_masks)) */ + if (_mm_movemask_epi8(rej_masks) == 0) { + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2); - /* Iterate over fulls: - */ - while (inmask) { - int i = ffs(inmask) - 1; - int ix = (i & 3) * 4; - int iy = (i >> 2) * 4; - int px = x + ix; - int py = y + iy; + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); + + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); + + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); + + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); + + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); - inmask &= ~(1 << i); + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); - block_full_4(task, tri, px, py); + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + out[nr].i = i; + out[nr].j = j; + out[nr].mask = mask; + if (mask != 0xffff) + nr++; + } + cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2)); + } + + c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2)); } + + for (i = 0; i < nr; i++) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x + 4 * out[i].j, + y + 4 * out[i].i, + 0xffff & ~out[i].mask); } + + + void lp_rast_triangle_3_4(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) + const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; - unsigned mask = arg.triangle.plane_mask; - const int x = task->x + (mask & 0xff); - const int y = task->y + (mask >> 8); - unsigned j; - - /* Iterate over partials: + const struct lp_rast_plane *plane = GET_PLANES(tri); + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &unused); + + /* Adjust dcdx; */ + dcdx = _mm_sub_epi32(zero, dcdx); + + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); + + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); + + { - unsigned mask = 0xffff; + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2); + + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); - for (j = 0; j < 3; j++) { - const int cx = (plane[j].c - - plane[j].dcdx * x - + plane[j].dcdy * y); + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); - const int dcdx = -plane[j].dcdx; - const int dcdy = plane[j].dcdy; - __m128i xdcdy = _mm_set1_epi32(dcdy); + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); - __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); - __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); - __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); - __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); - __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); - __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); - __m128i result = _mm_packs_epi16(cstep01, cstep23); + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); - /* Extract the sign bits - */ - mask &= ~_mm_movemask_epi8(result); - } + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); - if (mask) - lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + if (mask != 0xffff) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x, + y, + 0xffff & ~mask); } } - +#undef NR_PLANES #endif @@ -383,10 +456,13 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, #define TAG(x) x##_3 #define NR_PLANES 3 +/*#define TRI_4 lp_rast_triangle_3_4*/ +/*#define TRI_16 lp_rast_triangle_3_16*/ #include "lp_rast_tri_tmp.h" #define TAG(x) x##_4 #define NR_PLANES 4 +#define TRI_16 lp_rast_triangle_4_16 #include "lp_rast_tri_tmp.h" #define TAG(x) x##_5 diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 9830a43ba5..4825d651c0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -82,7 +82,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, const int dcdx = -plane[j].dcdx * 4; const int dcdy = plane[j].dcdy * 4; const int cox = plane[j].eo * 4; - const int cio = plane[j].ei * 4 - 1; + const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int cio = ei * 4 - 1; build_masks(c[j] + cox, cio - cox, @@ -156,6 +157,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); const int x = task->x, y = task->y; struct lp_rast_plane plane[NR_PLANES]; int c[NR_PLANES]; @@ -172,7 +174,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, while (plane_mask) { int i = ffs(plane_mask) - 1; - plane[j] = tri->plane[i]; + plane[j] = tri_plane[i]; plane_mask &= ~(1 << i); c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; @@ -180,7 +182,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, const int dcdx = -plane[j].dcdx * 16; const int dcdy = plane[j].dcdy * 16; const int cox = plane[j].eo * 16; - const int cio = plane[j].ei * 16 - 1; + const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int cio = ei * 16 - 1; build_masks(c[j] + cox, cio - cox, @@ -245,6 +248,133 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, } } +#if defined(PIPE_ARCH_SSE) && defined(TRI_16) +/* XXX: special case this when intersection is not required. + * - tile completely within bbox, + * - bbox completely within tile. + */ +void +TRI_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned mask = arg.triangle.plane_mask; + unsigned outmask, partial_mask; + unsigned j; + __m128i cstep4[NR_PLANES][4]; + + int x = (mask & 0xff); + int y = (mask >> 8); + + outmask = 0; /* outside one or more trivial reject planes */ + + x += task->x; + y += task->y; + + for (j = 0; j < NR_PLANES; j++) { + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); + cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); + cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); + cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); + + { + const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; + const int cox = plane[j].eo * 4; + + outmask |= sign_bits4(cstep4[j], c + cox); + } + } + + if (outmask == 0xffff) + return; + + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = 0xffff & ~outmask; + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + unsigned mask = 0xffff; + + partial_mask &= ~(1 << i); + + for (j = 0; j < NR_PLANES; j++) { + const int cx = (plane[j].c - 1 + - plane[j].dcdx * px + + plane[j].dcdy * py) * 4; + + mask &= ~sign_bits4(cstep4[j], cx); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); + } +} +#endif + +#if defined(PIPE_ARCH_SSE) && defined(TRI_4) +void +TRI_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned mask = arg.triangle.plane_mask; + const int x = task->x + (mask & 0xff); + const int y = task->y + (mask >> 8); + unsigned j; + + /* Iterate over partials: + */ + { + unsigned mask = 0xffff; + + for (j = 0; j < NR_PLANES; j++) { + const int cx = (plane[j].c + - plane[j].dcdx * x + + plane[j].dcdy * y); + + const int dcdx = -plane[j].dcdx; + const int dcdy = plane[j].dcdy; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* Extract the sign bits + */ + mask &= ~_mm_movemask_epi8(result); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + } +} +#endif + + + #undef TAG +#undef TRI_4 +#undef TRI_16 #undef NR_PLANES diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 8b504f23a3..a4fdf7cff3 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -203,7 +203,9 @@ lp_scene_end_rasterization(struct lp_scene *scene ) for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - bin->head = bin->tail = NULL; + bin->head = NULL; + bin->tail = NULL; + bin->last_state = NULL; } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index dbef7692e4..622c522f11 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -41,6 +41,7 @@ #include "lp_debug.h" struct lp_scene_queue; +struct lp_rast_state; /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. @@ -94,6 +95,7 @@ struct data_block { struct cmd_bin { ushort x; ushort y; + const struct lp_rast_state *last_state; /* most recent state set in bin */ struct cmd_block *head; struct cmd_block *tail; }; @@ -297,7 +299,7 @@ lp_scene_bin_command( struct lp_scene *scene, assert(x < scene->tiles_x); assert(y < scene->tiles_y); - assert(cmd <= LP_RAST_OP_END_QUERY); + assert(cmd < LP_RAST_OP_MAX); if (tail == NULL || tail->count == CMD_BLOCK_MAX) { tail = lp_scene_new_cmd_block( scene, bin ); @@ -318,6 +320,30 @@ lp_scene_bin_command( struct lp_scene *scene, } +static INLINE boolean +lp_scene_bin_cmd_with_state( struct lp_scene *scene, + unsigned x, unsigned y, + const struct lp_rast_state *state, + unsigned cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + + if (state != bin->last_state) { + bin->last_state = state; + if (!lp_scene_bin_command(scene, x, y, + LP_RAST_OP_SET_STATE, + lp_rast_arg_state(state))) + return FALSE; + } + + if (!lp_scene_bin_command( scene, x, y, cmd, arg )) + return FALSE; + + return TRUE; +} + + /* Add a command to all active bins. */ static INLINE boolean diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 3854fd70af..6118434d3d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -56,7 +56,7 @@ #include "draw/draw_vbuf.h" -static void set_scene_state( struct lp_setup_context *, enum setup_state, +static boolean set_scene_state( struct lp_setup_context *, enum setup_state, const char *reason); static boolean try_update_scene_state( struct lp_setup_context *setup ); @@ -167,7 +167,7 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup ) -static void +static boolean begin_binning( struct lp_setup_context *setup ) { struct lp_scene *scene = setup->scene; @@ -181,6 +181,8 @@ begin_binning( struct lp_setup_context *setup ) /* Always create a fence: */ scene->fence = lp_fence_create(MAX2(1, setup->num_threads)); + if (!scene->fence) + return FALSE; /* Initialize the bin flags and x/y coords: */ @@ -192,7 +194,8 @@ begin_binning( struct lp_setup_context *setup ) } ok = try_update_scene_state(setup); - assert(ok); + if (!ok) + return FALSE; if (setup->fb.zsbuf && ((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && @@ -208,7 +211,8 @@ begin_binning( struct lp_setup_context *setup ) ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_CLEAR_COLOR, setup->clear.color ); - assert(ok); + if (!ok) + return FALSE; } } @@ -216,12 +220,14 @@ begin_binning( struct lp_setup_context *setup ) if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) { if (!need_zsload) scene->has_depthstencil_clear = TRUE; + ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_CLEAR_ZSTENCIL, lp_rast_arg_clearzs( setup->clear.zsvalue, setup->clear.zsmask)); - assert(ok); + if (!ok) + return FALSE; } } @@ -229,15 +235,16 @@ begin_binning( struct lp_setup_context *setup ) ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(setup->active_query) ); - assert(ok); + if (!ok) + return FALSE; } - setup->clear.flags = 0; setup->clear.zsmask = 0; setup->clear.zsvalue = 0; LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); + return TRUE; } @@ -246,12 +253,12 @@ begin_binning( struct lp_setup_context *setup ) * * TODO: fast path for fullscreen clears and no triangles. */ -static void +static boolean execute_clears( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - begin_binning( setup ); + return begin_binning( setup ); } const char *states[] = { @@ -262,7 +269,7 @@ const char *states[] = { }; -static void +static boolean set_scene_state( struct lp_setup_context *setup, enum setup_state new_state, const char *reason) @@ -270,7 +277,7 @@ set_scene_state( struct lp_setup_context *setup, unsigned old_state = setup->state; if (old_state == new_state) - return; + return TRUE; if (LP_DEBUG & DEBUG_SCENE) { debug_printf("%s old %s new %s%s%s\n", @@ -294,12 +301,14 @@ set_scene_state( struct lp_setup_context *setup, break; case SETUP_ACTIVE: - begin_binning( setup ); + if (!begin_binning( setup )) + goto fail; break; case SETUP_FLUSHED: if (old_state == SETUP_CLEARED) - execute_clears( setup ); + if (!execute_clears( setup )) + goto fail; lp_setup_rasterize_scene( setup ); assert(setup->scene == NULL); @@ -307,9 +316,21 @@ set_scene_state( struct lp_setup_context *setup, default: assert(0 && "invalid setup state mode"); + goto fail; } setup->state = new_state; + return TRUE; + +fail: + if (setup->scene) { + lp_scene_end_rasterization(setup->scene); + setup->scene = NULL; + } + + setup->state = SETUP_FLUSHED; + lp_setup_reset( setup ); + return FALSE; } @@ -377,16 +398,19 @@ lp_setup_try_clear( struct lp_setup_context *setup, } if (flags & PIPE_CLEAR_DEPTHSTENCIL) { - unsigned zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; - unsigned smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; + uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; + uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format, depth, stencil); - zsmask = util_pack_uint_z_stencil(setup->fb.zsbuf->format, + + zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format, zmask, smask); + + zsvalue &= zsmask; } if (setup->state == SETUP_ACTIVE) { @@ -431,7 +455,7 @@ lp_setup_try_clear( struct lp_setup_context *setup, if (flags & PIPE_CLEAR_COLOR) { memcpy(setup->clear.color.clear_color, &color_arg, - sizeof color_arg); + sizeof setup->clear.color.clear_color); } } @@ -490,12 +514,14 @@ void lp_setup_set_point_state( struct lp_setup_context *setup, float point_size, boolean point_size_per_vertex, - uint sprite) + uint sprite_coord_enable, + uint sprite_coord_origin) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); setup->point_size = point_size; - setup->sprite = sprite; + setup->sprite_coord_enable = sprite_coord_enable; + setup->sprite_coord_origin = sprite_coord_origin; setup->point_size_per_vertex = point_size_per_vertex; } @@ -624,7 +650,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { struct pipe_sampler_view *view = i < num ? views[i] : NULL; - if(view) { + if (view) { struct pipe_resource *tex = view->texture; struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex); struct lp_jit_texture *jit_tex; @@ -683,6 +709,38 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, /** + * Called during state validation when LP_NEW_SAMPLER is set. + */ +void +lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup, + unsigned num, + const struct pipe_sampler_state **samplers) +{ + unsigned i; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(num <= PIPE_MAX_SAMPLERS); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL; + + if (sampler) { + struct lp_jit_texture *jit_tex; + jit_tex = &setup->fs.current.jit_context.textures[i]; + + jit_tex->min_lod = sampler->min_lod; + jit_tex->max_lod = sampler->max_lod; + jit_tex->lod_bias = sampler->lod_bias; + COPY_4V(jit_tex->border_color, sampler->border_color); + } + } + + setup->dirty |= LP_SETUP_NEW_FS; +} + + +/** * Is the given texture referenced by any scene? * Note: we have to check all scenes including any scenes currently * being rendered and the current scene being built. @@ -839,7 +897,7 @@ try_update_scene_state( struct lp_setup_context *setup ) return TRUE; } -void +boolean lp_setup_update_state( struct lp_setup_context *setup, boolean update_scene ) { @@ -870,20 +928,38 @@ lp_setup_update_state( struct lp_setup_context *setup, setup->setup.variant->key.size) == 0); } - if (update_scene) - set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ); + if (update_scene) { + if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ )) + return FALSE; + } /* Only call into update_scene_state() if we already have a * scene: */ if (update_scene && setup->scene) { assert(setup->state == SETUP_ACTIVE); - if (!try_update_scene_state(setup)) { - lp_setup_flush_and_restart(setup); - if (!try_update_scene_state(setup)) - assert(0); - } + + if (try_update_scene_state(setup)) + return TRUE; + + /* Update failed, try to restart the scene. + * + * Cannot call lp_setup_flush_and_restart() directly here + * because of potential recursion. + */ + if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__)) + return FALSE; + + if (!set_scene_state(setup, SETUP_ACTIVE, __FUNCTION__)) + return FALSE; + + if (!setup->scene) + return FALSE; + + return try_update_scene_state(setup); } + + return TRUE; } @@ -987,12 +1063,12 @@ lp_setup_begin_query(struct lp_setup_context *setup, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(pq))) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!lp_scene_bin_everywhere(setup->scene, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(pq))) { - assert(0); return; } } @@ -1036,14 +1112,20 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) } -void +boolean lp_setup_flush_and_restart(struct lp_setup_context *setup) { if (0) debug_printf("%s\n", __FUNCTION__); assert(setup->state == SETUP_ACTIVE); - set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__); - lp_setup_update_state(setup, TRUE); + + if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__)) + return FALSE; + + if (!lp_setup_update_state(setup, TRUE)) + return FALSE; + + return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 19078ebbcb..ebb18f8134 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -85,7 +85,8 @@ void lp_setup_set_point_state( struct lp_setup_context *setup, float point_size, boolean point_size_per_vertex, - uint sprite); + uint sprite_coord_enable, + uint sprite_coord_origin); void lp_setup_set_setup_variant( struct lp_setup_context *setup, @@ -121,6 +122,11 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, unsigned num, struct pipe_sampler_view **views); +void +lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup, + unsigned num, + const struct pipe_sampler_state **samplers); + unsigned lp_setup_is_resource_referenced( const struct lp_setup_context *setup, const struct pipe_resource *texture ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index ff3b69afa8..dc2533bedc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -76,7 +76,7 @@ struct lp_setup_context uint prim; uint vertex_size; uint nr_vertices; - uint sprite; + uint sprite_coord_enable, sprite_coord_origin; uint vertex_buffer_size; void *vertex_buffer; @@ -164,12 +164,12 @@ void lp_setup_choose_point( struct lp_setup_context *setup ); void lp_setup_init_vbuf(struct lp_setup_context *setup); -void lp_setup_update_state( struct lp_setup_context *setup, +boolean lp_setup_update_state( struct lp_setup_context *setup, boolean update_scene); void lp_setup_destroy( struct lp_setup_context *setup ); -void lp_setup_flush_and_restart(struct lp_setup_context *setup); +boolean lp_setup_flush_and_restart(struct lp_setup_context *setup); void lp_setup_print_triangle(struct lp_setup_context *setup, @@ -195,6 +195,4 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, const struct u_rect *bbox, int nr_planes ); -void lp_setup_flush_and_restart(struct lp_setup_context *setup); - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 928ffdc5cb..827413bb33 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -47,6 +47,10 @@ struct lp_line_info { const float (*v1)[4]; const float (*v2)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; @@ -54,14 +58,14 @@ struct lp_line_info { * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ static void constant_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, + struct lp_line_info *info, unsigned slot, const float value, unsigned i ) { - tri->inputs.a0[slot][i] = value; - tri->inputs.dadx[slot][i] = 0.0f; - tri->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } @@ -70,7 +74,6 @@ static void constant_coef( struct lp_setup_context *setup, * for a triangle. */ static void linear_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned vert_attr, @@ -83,10 +86,10 @@ static void linear_coef( struct lp_setup_context *setup, float dadx = da21 * info->dx * info->oneoverarea; float dady = da21 * info->dy * info->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - + info->a0[slot][i] = (a1 - (dadx * (info->v1[0][0] - setup->pixel_offset) + dady * (info->v1[0][1] - setup->pixel_offset))); } @@ -101,7 +104,6 @@ static void linear_coef( struct lp_setup_context *setup, * divide the interpolated value by the interpolated W at that fragment. */ static void perspective_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned vert_attr, @@ -116,43 +118,42 @@ static void perspective_coef( struct lp_setup_context *setup, float dadx = da21 * info->dx * info->oneoverarea; float dady = da21 * info->dy * info->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - - (dadx * (info->v1[0][0] - setup->pixel_offset) + - dady * (info->v1[0][1] - setup->pixel_offset))); + info->a0[slot][i] = (a1 - + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); } static void setup_fragcoord_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - tri->inputs.a0[slot][0] = 0.0; - tri->inputs.dadx[slot][0] = 1.0; - tri->inputs.dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - tri->inputs.a0[slot][1] = 0.0; - tri->inputs.dadx[slot][1] = 0.0; - tri->inputs.dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(setup, tri, info, slot, 0, 2); + linear_coef(setup, info, slot, 0, 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(setup, tri, info, slot, 0, 3); + linear_coef(setup, info, slot, 0, 3); } } @@ -160,7 +161,6 @@ setup_fragcoord_coef( struct lp_setup_context *setup, * Compute the tri->coef[] array dadx, dady, a0 values. */ static void setup_line_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info) { const struct lp_setup_variant_key *key = &setup->setup.variant->key; @@ -179,25 +179,25 @@ static void setup_line_coefficients( struct lp_setup_context *setup, if (key->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - linear_coef(setup, tri, info, slot+1, vert_attr, i); + linear_coef(setup, info, slot+1, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - perspective_coef(setup, tri, info, slot+1, vert_attr, i); + perspective_coef(setup, info, slot+1, vert_attr, i); fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; @@ -210,6 +210,12 @@ static void setup_line_coefficients( struct lp_setup_context *setup, fragcoord_usage_mask |= usage_mask; break; + case LP_INTERP_FACING: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, info, slot+1, 1.0, i); + break; + default: assert(0); } @@ -217,7 +223,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(setup, tri, info, 0, + setup_fragcoord_coef(setup, info, 0, fragcoord_usage_mask); } @@ -274,6 +280,7 @@ try_setup_line( struct lp_setup_context *setup, struct lp_scene *scene = setup->scene; const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *line; + struct lp_rast_plane *plane; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); struct u_rect bbox; @@ -296,6 +303,7 @@ try_setup_line( struct lp_setup_context *setup, float x2diff; float y2diff; float dx, dy; + float area; boolean draw_start; boolean draw_end; @@ -315,6 +323,18 @@ try_setup_line( struct lp_setup_context *setup, dx = v1[0][0] - v2[0][0]; dy = v1[0][1] - v2[0][1]; + area = (dx * dx + dy * dy); + if (area == 0) { + LP_COUNT(nr_culled_tris); + return TRUE; + } + + info.oneoverarea = 1.0f / area; + info.dx = dx; + info.dy = dy; + info.v1 = v1; + info.v2 = v2; + /* X-MAJOR LINE */ if (fabsf(dx) >= fabsf(dy)) { @@ -460,7 +480,7 @@ try_setup_line( struct lp_setup_context *setup, else { /* do intersection test */ float xintersect = fracf(v2[0][0]) + y2diff * dxdy; - draw_end = (xintersect < 1.0 && xintersect > 0.0); + draw_end = (xintersect < 1.0 && xintersect >= 0.0); } /* Are we already drawing start/end? @@ -498,7 +518,7 @@ try_setup_line( struct lp_setup_context *setup, x_offset_end = y_offset_end * dxdy; } } - + /* x/y positions in fixed point */ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; @@ -566,39 +586,35 @@ try_setup_line( struct lp_setup_context *setup, #endif /* calculate the deltas */ - line->plane[0].dcdy = x[0] - x[1]; - line->plane[1].dcdy = x[1] - x[2]; - line->plane[2].dcdy = x[2] - x[3]; - line->plane[3].dcdy = x[3] - x[0]; + plane = GET_PLANES(line); + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[3]; + plane[3].dcdy = x[3] - x[0]; - line->plane[0].dcdx = y[0] - y[1]; - line->plane[1].dcdx = y[1] - y[2]; - line->plane[2].dcdx = y[2] - y[3]; - line->plane[3].dcdx = y[3] - y[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[3]; + plane[3].dcdx = y[3] - y[0]; - info.oneoverarea = 1.0f / (dx * dx + dy * dy); - info.dx = dx; - info.dy = dy; - info.v1 = v1; - info.v2 = v2; - /* Setup parameter interpolants: */ - setup_line_coefficients( setup, line, &info); + info.a0 = GET_A0(&line->inputs); + info.dadx = GET_DADX(&line->inputs); + info.dady = GET_DADY(&line->inputs); + setup_line_coefficients(setup, &info); - line->inputs.facing = 1.0F; - line->inputs.state = setup->fs.stored; + line->inputs.frontfacing = TRUE; line->inputs.disable = FALSE; line->inputs.opaque = FALSE; for (i = 0; i < 4; i++) { - struct lp_rast_plane *plane = &line->plane[i]; /* half-edge constants, will be interated over the whole render * target. */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; /* correct for top-left vs. bottom-left fill convention. @@ -614,38 +630,34 @@ try_setup_line( struct lp_setup_context *setup, * to its usual method, in which case it will probably want * to use the opposite, top-left convention. */ - if (plane->dcdx < 0) { + if (plane[i].dcdx < 0) { /* both fill conventions want this - adjust for left edges */ - plane->c++; + plane[i].c++; } - else if (plane->dcdx == 0) { + else if (plane[i].dcdx == 0) { if (setup->pixel_offset == 0) { /* correct for top-left fill convention: */ - if (plane->dcdy > 0) plane->c++; + if (plane[i].dcdy > 0) plane[i].c++; } else { /* correct for bottom-left fill convention: */ - if (plane->dcdy < 0) plane->c++; + if (plane[i].dcdy < 0) plane[i].c++; } } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; - - /* Calculate trivial accept offsets from the above. - */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; } @@ -668,29 +680,25 @@ try_setup_line( struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 8) { - line->plane[4].dcdx = -1; - line->plane[4].dcdy = 0; - line->plane[4].c = 1-bbox.x0; - line->plane[4].ei = 0; - line->plane[4].eo = 1; - - line->plane[5].dcdx = 1; - line->plane[5].dcdy = 0; - line->plane[5].c = bbox.x1+1; - line->plane[5].ei = -1; - line->plane[5].eo = 0; - - line->plane[6].dcdx = 0; - line->plane[6].dcdy = 1; - line->plane[6].c = 1-bbox.y0; - line->plane[6].ei = 0; - line->plane[6].eo = 1; - - line->plane[7].dcdx = 0; - line->plane[7].dcdy = -1; - line->plane[7].c = bbox.y1+1; - line->plane[7].ei = -1; - line->plane[7].eo = 0; + plane[4].dcdx = -1; + plane[4].dcdy = 0; + plane[4].c = 1-bbox.x0; + plane[4].eo = 1; + + plane[5].dcdx = 1; + plane[5].dcdy = 0; + plane[5].c = bbox.x1+1; + plane[5].eo = 0; + + plane[6].dcdx = 0; + plane[6].dcdy = 1; + plane[6].c = 1-bbox.y0; + plane[6].eo = 1; + + plane[7].dcdx = 0; + plane[7].dcdy = -1; + plane[7].c = bbox.y1+1; + plane[7].eo = 0; } return lp_setup_bin_triangle(setup, line, &bbox, nr_planes); @@ -703,10 +711,11 @@ static void lp_setup_line( struct lp_setup_context *setup, { if (!try_setup_line( setup, v0, v1 )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!try_setup_line( setup, v0, v1 )) - assert(0); + return; } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index c98966022e..146f1bd07c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -33,7 +33,6 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "lp_perf.h" -#include "lp_setup_context.h" #include "lp_rast.h" #include "lp_state_fs.h" #include "lp_state_setup.h" @@ -47,63 +46,118 @@ struct point_info { int dx01, dx12; const float (*v0)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *point, - unsigned slot, - const float value, - unsigned i ) +static void +constant_coef(struct lp_setup_context *setup, + struct point_info *info, + unsigned slot, + const float value, + unsigned i) { - point->inputs.a0[slot][i] = value; - point->inputs.dadx[slot][i] = 0.0f; - point->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } -static void perspective_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) + +static void +point_persp_coeff(struct lp_setup_context *setup, + const struct point_info *info, + unsigned slot, + unsigned i) { - if (i == 0) { - float dadx = FIXED_ONE / (float)info->dx12; + /* + * Fragment shader expects pre-multiplied w for LP_INTERP_PERSPECTIVE. A + * better stratergy would be to take the primitive in consideration when + * generating the fragment shader key, and therefore avoid the per-fragment + * perspective divide. + */ + + float w0 = info->v0[0][3]; + + assert(i < 4); + + info->a0[slot][i] = info->v0[slot][i]*w0; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; +} + + +/** + * Setup automatic texcoord coefficients (for sprite rendering). + * \param slot the vertex attribute slot to setup + * \param i the attribute channel in [0,3] + * \param sprite_coord_origin one of PIPE_SPRITE_COORD_x + * \param perspective does the shader expects pre-multiplied w, i.e., + * LP_INTERP_PERSPECTIVE is specified in the shader key + */ +static void +texcoord_coef(struct lp_setup_context *setup, + const struct point_info *info, + unsigned slot, + unsigned i, + unsigned sprite_coord_origin, + boolean perspective) +{ + float w0 = info->v0[0][3]; + + assert(i < 4); + + if (i == 0) { + float dadx = FIXED_ONE / (float)info->dx12; float dady = 0.0f; - point->inputs.dadx[slot][i] = dadx; - point->inputs.dady[slot][i] = dady; - point->inputs.a0[slot][i] = (0.5 - - (dadx * ((float)info->v0[0][0] - setup->pixel_offset) + - dady * ((float)info->v0[0][1] - setup->pixel_offset))); - } + float x0 = info->v0[0][0] - setup->pixel_offset; + float y0 = info->v0[0][1] - setup->pixel_offset; - else if (i == 1) { - float dadx = 0.0f; - float dady = FIXED_ONE / (float)info->dx12; - - point->inputs.dadx[slot][i] = dadx; - point->inputs.dady[slot][i] = dady; - point->inputs.a0[slot][i] = (0.5 - - (dadx * ((float)info->v0[0][0] - setup->pixel_offset) + - dady * ((float)info->v0[0][1] - setup->pixel_offset))); + info->dadx[slot][0] = dadx; + info->dady[slot][0] = dady; + info->a0[slot][0] = 0.5 - (dadx * x0 + dady * y0); + + if (perspective) { + info->dadx[slot][0] *= w0; + info->dady[slot][0] *= w0; + info->a0[slot][0] *= w0; + } } + else if (i == 1) { + float dadx = 0.0f; + float dady = FIXED_ONE / (float)info->dx12; + float x0 = info->v0[0][0] - setup->pixel_offset; + float y0 = info->v0[0][1] - setup->pixel_offset; + if (sprite_coord_origin == PIPE_SPRITE_COORD_LOWER_LEFT) { + dady = -dady; + } + + info->dadx[slot][1] = dadx; + info->dady[slot][1] = dady; + info->a0[slot][1] = 0.5 - (dadx * x0 + dady * y0); + + if (perspective) { + info->dadx[slot][1] *= w0; + info->dady[slot][1] *= w0; + info->a0[slot][1] *= w0; + } + } else if (i == 2) { - point->inputs.a0[slot][i] = 0.0f; - point->inputs.dadx[slot][i] = 0.0f; - point->inputs.dady[slot][i] = 0.0f; + info->a0[slot][2] = 0.0f; + info->dadx[slot][2] = 0.0f; + info->dady[slot][2] = 0.0f; } - - else if (i == 3) { - point->inputs.a0[slot][i] = 1.0f; - point->inputs.dadx[slot][i] = 0.0f; - point->inputs.dady[slot][i] = 0.0f; + else { + info->a0[slot][3] = perspective ? w0 : 1.0f; + info->dadx[slot][3] = 0.0f; + info->dady[slot][3] = 0.0f; } - } @@ -115,45 +169,45 @@ static void perspective_coef( struct lp_setup_context *setup, */ static void setup_point_fragcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info, + struct point_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - point->inputs.a0[slot][0] = 0.0; - point->inputs.dadx[slot][0] = 1.0; - point->inputs.dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - point->inputs.a0[slot][1] = 0.0; - point->inputs.dadx[slot][1] = 0.0; - point->inputs.dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - constant_coef(setup, point, slot, info->v0[0][2], 2); + constant_coef(setup, info, slot, info->v0[0][2], 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - constant_coef(setup, point, slot, info->v0[0][3], 3); + constant_coef(setup, info, slot, info->v0[0][3], 3); } } + /** * Compute the point->coef[] array dadx, dady, a0 values. */ static void setup_point_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info) + struct point_info *info) { const struct lp_setup_variant_key *key = &setup->setup.variant->key; + const struct lp_fragment_shader *shader = setup->fs.current.variant->shader; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; @@ -162,9 +216,15 @@ setup_point_coefficients( struct lp_setup_context *setup, for (slot = 0; slot < key->num_inputs; slot++) { unsigned vert_attr = key->inputs[slot].src_index; unsigned usage_mask = key->inputs[slot].usage_mask; + enum lp_interp interp = key->inputs[slot].interp; + boolean perspective = !!(interp == LP_INTERP_PERSPECTIVE); unsigned i; + + if (perspective & usage_mask) { + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + } - switch (key->inputs[slot].interp) { + switch (interp) { case LP_INTERP_POSITION: /* * The generated pixel interpolators will pick up the coeffs from @@ -174,37 +234,63 @@ setup_point_coefficients( struct lp_setup_context *setup, fragcoord_usage_mask |= usage_mask; break; + case LP_INTERP_LINEAR: + /* Sprite tex coords may use linear interpolation someday */ + /* fall-through */ case LP_INTERP_PERSPECTIVE: - /* For point sprite textures */ - if (setup->fs.current.variant->shader->info.input_semantic_name[slot] - == TGSI_SEMANTIC_GENERIC) - { - int index = setup->fs.current.variant->shader->info.input_semantic_index[slot]; - - if (setup->sprite & (1 << index)) { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - perspective_coef(setup, point, info, slot+1, vert_attr, i); - fragcoord_usage_mask |= TGSI_WRITEMASK_W; - break; + /* check if the sprite coord flag is set for this attribute. + * If so, set it up so it up so x and y vary from 0 to 1. + */ + if (shader->info.base.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) { + unsigned semantic_index = shader->info.base.input_semantic_index[slot]; + /* Note that sprite_coord enable is a bitfield of + * PIPE_MAX_SHADER_OUTPUTS bits. + */ + if (semantic_index < PIPE_MAX_SHADER_OUTPUTS && + (setup->sprite_coord_enable & (1 << semantic_index))) { + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + texcoord_coef(setup, info, slot + 1, i, + setup->sprite_coord_origin, + perspective); + } + } + break; } } - - /* Otherwise fallthrough */ - default: + /* fall-through */ + case LP_INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) { - if (usage_mask & (1 << i)) - constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i); + if (usage_mask & (1 << i)) { + if (perspective) { + point_persp_coeff(setup, info, slot+1, i); + } + else { + constant_coef(setup, info, slot+1, info->v0[vert_attr][i], i); + } + } } + break; + + case LP_INTERP_FACING: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, info, slot+1, 1.0, i); + break; + + default: + assert(0); + break; } } /* The internal position input is in slot zero: */ - setup_point_fragcoord_coef(setup, point, info, 0, + setup_point_fragcoord_coef(setup, info, 0, fragcoord_usage_mask); } + static INLINE int subpixel_snap(float a) { @@ -285,55 +371,57 @@ try_setup_point( struct lp_setup_context *setup, info.dx12 = fixed_width; info.dy01 = fixed_width; info.dy12 = 0; + info.a0 = GET_A0(&point->inputs); + info.dadx = GET_DADX(&point->inputs); + info.dady = GET_DADY(&point->inputs); /* Setup parameter interpolants: */ - setup_point_coefficients(setup, point, &info); + setup_point_coefficients(setup, &info); - point->inputs.facing = 1.0F; - point->inputs.state = setup->fs.stored; + point->inputs.frontfacing = TRUE; point->inputs.disable = FALSE; point->inputs.opaque = FALSE; { - point->plane[0].dcdx = -1; - point->plane[0].dcdy = 0; - point->plane[0].c = 1-bbox.x0; - point->plane[0].ei = 0; - point->plane[0].eo = 1; - - point->plane[1].dcdx = 1; - point->plane[1].dcdy = 0; - point->plane[1].c = bbox.x1+1; - point->plane[1].ei = -1; - point->plane[1].eo = 0; - - point->plane[2].dcdx = 0; - point->plane[2].dcdy = 1; - point->plane[2].c = 1-bbox.y0; - point->plane[2].ei = 0; - point->plane[2].eo = 1; - - point->plane[3].dcdx = 0; - point->plane[3].dcdy = -1; - point->plane[3].c = bbox.y1+1; - point->plane[3].ei = -1; - point->plane[3].eo = 0; + struct lp_rast_plane *plane = GET_PLANES(point); + + plane[0].dcdx = -1; + plane[0].dcdy = 0; + plane[0].c = 1-bbox.x0; + plane[0].eo = 1; + + plane[1].dcdx = 1; + plane[1].dcdy = 0; + plane[1].c = bbox.x1+1; + plane[1].eo = 0; + + plane[2].dcdx = 0; + plane[2].dcdy = 1; + plane[2].c = 1-bbox.y0; + plane[2].eo = 1; + + plane[3].dcdx = 0; + plane[3].dcdy = -1; + plane[3].c = bbox.y1+1; + plane[3].eo = 0; } return lp_setup_bin_triangle(setup, point, &bbox, nr_planes); } -static void lp_setup_point( struct lp_setup_context *setup, - const float (*v0)[4] ) +static void +lp_setup_point(struct lp_setup_context *setup, + const float (*v0)[4]) { if (!try_setup_point( setup, v0 )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!try_setup_point( setup, v0 )) - assert(0); + return; } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 43617a6b67..03036cd75d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -32,6 +32,7 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_rect.h" +#include "util/u_sse.h" #include "lp_perf.h" #include "lp_setup_context.h" #include "lp_rast.h" @@ -40,7 +41,9 @@ #define NUM_CHANNELS 4 - +#if defined(PIPE_ARCH_SSE) +#include <emmintrin.h> +#endif static INLINE int subpixel_snap(float a) @@ -70,27 +73,28 @@ fixed_to_float(int a) */ struct lp_rast_triangle * lp_setup_alloc_triangle(struct lp_scene *scene, - unsigned num_inputs, + unsigned nr_inputs, unsigned nr_planes, unsigned *tri_size) { - unsigned input_array_sz = NUM_CHANNELS * (num_inputs + 1) * sizeof(float); + unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane); struct lp_rast_triangle *tri; - unsigned tri_bytes, bytes; - char *inputs; - tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16); - bytes = tri_bytes + (3 * input_array_sz); + *tri_size = (sizeof(struct lp_rast_triangle) + + 3 * input_array_sz + + plane_sz); - tri = lp_scene_alloc_aligned( scene, bytes, 16 ); + tri = lp_scene_alloc_aligned( scene, *tri_size, 16 ); + if (tri == NULL) + return NULL; - if (tri) { - inputs = ((char *)tri) + tri_bytes; - tri->inputs.a0 = (float (*)[4]) inputs; - tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); - tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); + tri->inputs.stride = input_array_sz; - *tri_size = bytes; + { + char *a = (char *)tri; + char *b = (char *)&GET_PLANES(tri)[nr_planes]; + assert(b - a == *tri_size); } return tri; @@ -161,8 +165,9 @@ lp_setup_print_triangle(struct lp_setup_context *setup, } +#define MAX_PLANES 8 static unsigned -lp_rast_tri_tab[9] = { +lp_rast_tri_tab[MAX_PLANES+1] = { 0, /* should be impossible */ LP_RAST_OP_TRIANGLE_1, LP_RAST_OP_TRIANGLE_2, @@ -200,14 +205,16 @@ lp_setup_whole_tile(struct lp_setup_context *setup, } LP_COUNT(nr_shade_opaque_64); - return lp_scene_bin_command( scene, tx, ty, - LP_RAST_OP_SHADE_TILE_OPAQUE, - lp_rast_arg_inputs(inputs) ); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE_OPAQUE, + lp_rast_arg_inputs(inputs) ); } else { LP_COUNT(nr_shade_64); - return lp_scene_bin_command( scene, tx, ty, - LP_RAST_OP_SHADE_TILE, - lp_rast_arg_inputs(inputs) ); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE, + lp_rast_arg_inputs(inputs) ); } } @@ -227,12 +234,11 @@ do_triangle_ccw(struct lp_setup_context *setup, struct lp_scene *scene = setup->scene; const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *tri; - int x[3]; - int y[3]; - int area; + struct lp_rast_plane *plane; + int x[4]; + int y[4]; struct u_rect bbox; unsigned tri_bytes; - int i; int nr_planes = 3; if (0) @@ -249,10 +255,12 @@ do_triangle_ccw(struct lp_setup_context *setup, x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + x[3] = 0; y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); - + y[3] = 0; + /* Bounding rectangle (in pixels) */ { @@ -296,7 +304,7 @@ do_triangle_ccw(struct lp_setup_context *setup, if (!tri) return FALSE; -#ifdef DEBUG +#if 0 tri->v[0][0] = v0[0][0]; tri->v[1][0] = v1[0][0]; tri->v[2][0] = v2[0][0]; @@ -305,104 +313,173 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->v[2][1] = v2[0][1]; #endif - tri->plane[0].dcdy = x[0] - x[1]; - tri->plane[1].dcdy = x[1] - x[2]; - tri->plane[2].dcdy = x[2] - x[0]; - - tri->plane[0].dcdx = y[0] - y[1]; - tri->plane[1].dcdx = y[1] - y[2]; - tri->plane[2].dcdx = y[2] - y[0]; - - area = (tri->plane[0].dcdy * tri->plane[2].dcdx - - tri->plane[2].dcdy * tri->plane[0].dcdx); - LP_COUNT(nr_tris); - /* Cull non-ccw and zero-sized triangles. - * - * XXX: subject to overflow?? - */ - if (area <= 0) { - lp_scene_putback_data( scene, tri_bytes ); - LP_COUNT(nr_culled_tris); - return TRUE; - } - /* Setup parameter interpolants: */ setup->setup.variant->jit_function( v0, v1, v2, frontfacing, - tri->inputs.a0, - tri->inputs.dadx, - tri->inputs.dady, + GET_A0(&tri->inputs), + GET_DADX(&tri->inputs), + GET_DADY(&tri->inputs), &setup->setup.variant->key ); - tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + tri->inputs.frontfacing = frontfacing; tri->inputs.disable = FALSE; tri->inputs.opaque = setup->fs.current.variant->opaque; - tri->inputs.state = setup->fs.stored; if (0) lp_dump_setup_coef(&setup->setup.variant->key, - (const float (*)[4])tri->inputs.a0, - (const float (*)[4])tri->inputs.dadx, - (const float (*)[4])tri->inputs.dady); - - for (i = 0; i < 3; i++) { - struct lp_rast_plane *plane = &tri->plane[i]; + (const float (*)[4])GET_A0(&tri->inputs), + (const float (*)[4])GET_DADX(&tri->inputs), + (const float (*)[4])GET_DADY(&tri->inputs)); + + plane = GET_PLANES(tri); + +#if defined(PIPE_ARCH_SSE) + { + __m128i vertx, verty; + __m128i shufx, shufy; + __m128i dcdx, dcdy, c; + __m128i unused; + __m128i dcdx_neg_mask; + __m128i dcdy_neg_mask; + __m128i dcdx_zero_mask; + __m128i top_left_flag; + __m128i c_inc_mask, c_inc; + __m128i eo, p0, p1, p2; + __m128i zero = _mm_setzero_si128(); + + vertx = _mm_loadu_si128((__m128i *)x); /* vertex x coords */ + verty = _mm_loadu_si128((__m128i *)y); /* vertex y coords */ + + shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1)); + shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1)); + + dcdx = _mm_sub_epi32(verty, shufy); + dcdy = _mm_sub_epi32(vertx, shufx); + + dcdx_neg_mask = _mm_srai_epi32(dcdx, 31); + dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero); + dcdy_neg_mask = _mm_srai_epi32(dcdy, 31); + + top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0); + + c_inc_mask = _mm_or_si128(dcdx_neg_mask, + _mm_and_si128(dcdx_zero_mask, + _mm_xor_si128(dcdy_neg_mask, + top_left_flag))); - /* half-edge constants, will be interated over the whole render - * target. + c_inc = _mm_srli_epi32(c_inc_mask, 31); + + c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), + mm_mullo_epi32(dcdy, verty)); + + c = _mm_add_epi32(c, c_inc); + + /* Scale up to match c: */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; - - /* correct for top-left vs. bottom-left fill convention. - * - * note that we're overloading gl_rasterization_rules to mean - * both (0.5,0.5) pixel centers *and* bottom-left filling - * convention. - * - * GL actually has a top-left filling convention, but GL's - * notion of "top" differs from gallium's... - * - * Also, sometimes (in FBO cases) GL will render upside down - * to its usual method, in which case it will probably want - * to use the opposite, top-left convention. - */ - if (plane->dcdx < 0) { - /* both fill conventions want this - adjust for left edges */ - plane->c++; - } - else if (plane->dcdx == 0) { - if (setup->pixel_offset == 0) { - /* correct for top-left fill convention: - */ - if (plane->dcdy > 0) plane->c++; + dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER); + dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER); + + /* Calculate trivial reject values: + */ + eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), + _mm_and_si128(dcdx_neg_mask, dcdx)); + + /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ + + /* Pointless transpose which gets undone immediately in + * rasterization: + */ + transpose4_epi32(&c, &dcdx, &dcdy, &eo, + &p0, &p1, &p2, &unused); + + _mm_store_si128((__m128i *)&plane[0], p0); + _mm_store_si128((__m128i *)&plane[1], p1); + _mm_store_si128((__m128i *)&plane[2], p2); + } +#else + { + int i; + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[0]; + + for (i = 0; i < 3; i++) { + /* half-edge constants, will be interated over the whole render + * target. + */ + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane[i].dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane[i].c++; } - else { - /* correct for bottom-left fill convention: - */ - if (plane->dcdy < 0) plane->c++; + else if (plane[i].dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane[i].dcdy > 0) plane[i].c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane[i].dcdy < 0) plane[i].c++; + } } - } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; - /* find trivial reject offsets for each edge for a single-pixel - * sized block. These will be scaled up at each recursive level to - * match the active blocksize. Scaling in this way works best if - * the blocks are square. - */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; + } + } +#endif - /* Calculate trivial accept offsets from the above. - */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + if (0) { + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[0].c, + plane[0].dcdx, + plane[0].dcdy, + plane[0].eo); + + debug_printf("p1: %08x/%08x/%08x/%08x\n", + plane[1].c, + plane[1].dcdx, + plane[1].dcdy, + plane[1].eo); + + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[2].c, + plane[2].dcdx, + plane[2].dcdy, + plane[2].eo); } @@ -425,29 +502,25 @@ do_triangle_ccw(struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 7) { - tri->plane[3].dcdx = -1; - tri->plane[3].dcdy = 0; - tri->plane[3].c = 1-bbox.x0; - tri->plane[3].ei = 0; - tri->plane[3].eo = 1; - - tri->plane[4].dcdx = 1; - tri->plane[4].dcdy = 0; - tri->plane[4].c = bbox.x1+1; - tri->plane[4].ei = -1; - tri->plane[4].eo = 0; - - tri->plane[5].dcdx = 0; - tri->plane[5].dcdy = 1; - tri->plane[5].c = 1-bbox.y0; - tri->plane[5].ei = 0; - tri->plane[5].eo = 1; - - tri->plane[6].dcdx = 0; - tri->plane[6].dcdy = -1; - tri->plane[6].c = bbox.y1+1; - tri->plane[6].ei = -1; - tri->plane[6].eo = 0; + plane[3].dcdx = -1; + plane[3].dcdy = 0; + plane[3].c = 1-bbox.x0; + plane[3].eo = 1; + + plane[4].dcdx = 1; + plane[4].dcdy = 0; + plane[4].c = bbox.x1+1; + plane[4].eo = 0; + + plane[5].dcdx = 0; + plane[5].dcdy = 1; + plane[5].c = 1-bbox.y0; + plane[5].eo = 1; + + plane[6].dcdx = 0; + plane[6].dcdy = -1; + plane[6].c = bbox.y1+1; + plane[6].eo = 0; } return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes ); @@ -500,56 +573,63 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) | (bbox->y1 - (bbox->y0 & ~3))); - if (nr_planes == 3) { - if (sz < 4 && dx < 64) - { - /* Triangle is contained in a single 4x4 stamp: - */ - int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); - - return lp_scene_bin_command( scene, - bbox->x0/64, bbox->y0/64, - LP_RAST_OP_TRIANGLE_3_4, - lp_rast_arg_triangle(tri, mask) ); - } - - if (sz < 16 && dx < 64) - { - int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); - - /* Triangle is contained in a single 16x16 block: - */ - return lp_scene_bin_command( scene, - bbox->x0/64, bbox->y0/64, - LP_RAST_OP_TRIANGLE_3_16, - lp_rast_arg_triangle(tri, mask) ); - } - } - - /* Determine which tile(s) intersect the triangle's bounding box */ if (dx < TILE_SIZE) { int ix0 = bbox->x0 / TILE_SIZE; int iy0 = bbox->y0 / TILE_SIZE; + int px = bbox->x0 & 63 & ~3; + int py = bbox->y0 & 63 & ~3; + int mask = px | (py << 8); assert(iy0 == bbox->y1 / TILE_SIZE && ix0 == bbox->x1 / TILE_SIZE); + if (nr_planes == 3) { + if (sz < 4) + { + /* Triangle is contained in a single 4x4 stamp: + */ + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_4, + lp_rast_arg_triangle(tri, mask) ); + } + + if (sz < 16) + { + /* Triangle is contained in a single 16x16 block: + */ + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_16, + lp_rast_arg_triangle(tri, mask) ); + } + } + else if (nr_planes == 4 && sz < 16) + { + return lp_scene_bin_cmd_with_state(scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_4_16, + lp_rast_arg_triangle(tri, mask) ); + } + + /* Triangle is contained in a single tile: */ - return lp_scene_bin_command( scene, ix0, iy0, - lp_rast_tri_tab[nr_planes], - lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); } else { - int c[7]; - int ei[7]; - int eo[7]; - int xstep[7]; - int ystep[7]; + struct lp_rast_plane *plane = GET_PLANES(tri); + int c[MAX_PLANES]; + int ei[MAX_PLANES]; + int eo[MAX_PLANES]; + int xstep[MAX_PLANES]; + int ystep[MAX_PLANES]; int x, y; int ix0 = bbox->x0 / TILE_SIZE; @@ -558,14 +638,17 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, int iy1 = bbox->y1 / TILE_SIZE; for (i = 0; i < nr_planes; i++) { - c[i] = (tri->plane[i].c + - tri->plane[i].dcdy * iy0 * TILE_SIZE - - tri->plane[i].dcdx * ix0 * TILE_SIZE); - - ei[i] = tri->plane[i].ei << TILE_ORDER; - eo[i] = tri->plane[i].eo << TILE_ORDER; - xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER); - ystep[i] = tri->plane[i].dcdy << TILE_ORDER; + c[i] = (plane[i].c + + plane[i].dcdy * iy0 * TILE_SIZE - + plane[i].dcdx * ix0 * TILE_SIZE); + + ei[i] = (plane[i].dcdy - + plane[i].dcdx - + plane[i].eo) << TILE_ORDER; + + eo[i] = plane[i].eo << TILE_ORDER; + xstep[i] = -(plane[i].dcdx << TILE_ORDER); + ystep[i] = plane[i].dcdy << TILE_ORDER; } @@ -578,7 +661,7 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, for (y = iy0; y <= iy1; y++) { boolean in = FALSE; /* are we inside the triangle? */ - int cx[7]; + int cx[MAX_PLANES]; for (i = 0; i < nr_planes; i++) cx[i] = c[i]; @@ -607,9 +690,11 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, */ int count = util_bitcount(partial); in = TRUE; - if (!lp_scene_bin_command( scene, x, y, - lp_rast_tri_tab[count], - lp_rast_arg_triangle(tri, partial) )) + + if (!lp_scene_bin_cmd_with_state( scene, x, y, + setup->fs.stored, + lp_rast_tri_tab[count], + lp_rast_arg_triangle(tri, partial) )) goto fail; LP_COUNT(nr_partially_covered_64); @@ -648,40 +733,62 @@ fail: /** - * Draw triangle if it's CW, cull otherwise. + * Try to draw the triangle, restart the scene on failure. */ -static void triangle_cw( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +static void retry_triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front) { - if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface )) + if (!do_triangle_ccw( setup, v0, v1, v2, front )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; - if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface )) - assert(0); + if (!do_triangle_ccw( setup, v0, v1, v2, front )) + return; } } +static INLINE float +calc_area(const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + float dx01 = v0[0][0] - v1[0][0]; + float dy01 = v0[0][1] - v1[0][1]; + float dx20 = v2[0][0] - v0[0][0]; + float dy20 = v2[0][1] - v0[0][1]; + return dx01 * dy20 - dx20 * dy01; +} + /** - * Draw triangle if it's CCW, cull otherwise. + * Draw triangle if it's CW, cull otherwise. */ -static void triangle_ccw( struct lp_setup_context *setup, +static void triangle_cw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { - if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface )) - { - lp_setup_flush_and_restart(setup); - if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface )) - assert(0); - } + float area = calc_area(v0, v1, v2); + + if (area < 0.0f) + retry_triangle_ccw(setup, v0, v2, v1, !setup->ccw_is_frontface); } +static void triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + float area = calc_area(v0, v1, v2); + + if (area > 0.0f) + retry_triangle_ccw(setup, v0, v1, v2, setup->ccw_is_frontface); +} /** * Draw triangle whether it's CW or CCW. @@ -691,18 +798,12 @@ static void triangle_both( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4] ) { - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - const float det = ex * fy - ey * fx; - if (det < 0.0f) - triangle_ccw( setup, v0, v1, v2 ); - else if (det > 0.0f) - triangle_cw( setup, v0, v1, v2 ); + float area = calc_area(v0, v1, v2); + + if (area > 0.0f) + retry_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); + else if (area < 0.0f) + retry_triangle_ccw( setup, v0, v2, v1, !setup->ccw_is_frontface ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 6308561f24..9c1f0fe793 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -141,7 +141,8 @@ lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; - lp_setup_update_state(setup, TRUE); + if (!lp_setup_update_state(setup, TRUE)) + return; switch (setup->prim) { case PIPE_PRIM_POINTS: @@ -338,7 +339,8 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; - lp_setup_update_state(setup, TRUE); + if (!lp_setup_update_state(setup, TRUE)) + return; switch (setup->prim) { case PIPE_PRIM_POINTS: diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index de242aa93c..0f5f7369e0 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -67,14 +67,14 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); - for (i = 0; i < lpfs->info.num_inputs; i++) { + for (i = 0; i < lpfs->info.base.num_inputs; i++) { /* * Search for each input in current vs output: */ vs_index = draw_find_shader_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); + lpfs->info.base.input_semantic_name[i], + lpfs->info.base.input_semantic_index[i]); /* * Emit the requested fs attribute for all but position. @@ -94,7 +94,6 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_compute_vertex_size(vinfo); lp_setup_set_vertex_info(llvmpipe->setup, vinfo); - } @@ -153,11 +152,16 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) lp_setup_set_fs_constants(llvmpipe->setup, llvmpipe->constants[PIPE_SHADER_FRAGMENT][0]); - if (llvmpipe->dirty & LP_NEW_SAMPLER_VIEW) + if (llvmpipe->dirty & (LP_NEW_SAMPLER_VIEW)) lp_setup_set_fragment_sampler_views(llvmpipe->setup, llvmpipe->num_fragment_sampler_views, llvmpipe->fragment_sampler_views); + if (llvmpipe->dirty & (LP_NEW_SAMPLER)) + lp_setup_set_fragment_sampler_state(llvmpipe->setup, + llvmpipe->num_samplers, + llvmpipe->sampler); + llvmpipe->dirty = 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index ad058e384a..9fbedac165 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -99,74 +99,12 @@ #include <llvm-c/Analysis.h> +#include <llvm-c/BitWriter.h> static unsigned fs_no = 0; -/** - * Generate the depth /stencil test code. - */ -static void -generate_depth_stencil(LLVMBuilderRef builder, - const struct lp_fragment_shader_variant_key *key, - struct lp_type src_type, - struct lp_build_mask_context *mask, - LLVMValueRef stencil_refs[2], - LLVMValueRef src, - LLVMValueRef dst_ptr, - LLVMValueRef facing, - LLVMValueRef counter) -{ - const struct util_format_description *format_desc; - struct lp_type dst_type; - - if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled) - return; - - format_desc = util_format_description(key->zsbuf_format); - assert(format_desc); - - /* - * Depths are expected to be between 0 and 1, even if they are stored in - * floats. Setting these bits here will ensure that the lp_build_conv() call - * below won't try to unnecessarily clamp the incoming values. - */ - if(src_type.floating) { - src_type.sign = FALSE; - src_type.norm = TRUE; - } - else { - assert(!src_type.sign); - assert(src_type.norm); - } - - /* Pick the depth type. */ - dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); - - /* FIXME: Cope with a depth test type with a different bit width. */ - assert(dst_type.width == src_type.width); - assert(dst_type.length == src_type.length); - - /* Convert fragment Z from float to integer */ - lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); - - dst_ptr = LLVMBuildBitCast(builder, - dst_ptr, - LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); - lp_build_depth_stencil_test(builder, - &key->depth, - key->stencil, - dst_type, - format_desc, - mask, - stencil_refs, - src, - dst_ptr, - facing, - counter); -} - /** * Expand the relevent bits of mask_input to a 4-dword mask for the @@ -248,6 +186,26 @@ generate_quad_mask(LLVMBuilderRef builder, } +#define EARLY_DEPTH_TEST 0x1 +#define LATE_DEPTH_TEST 0x2 +#define EARLY_DEPTH_WRITE 0x4 +#define LATE_DEPTH_WRITE 0x8 + +static int +find_output_by_semantic( const struct tgsi_shader_info *info, + unsigned semantic, + unsigned index ) +{ + int i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return i; + + return -1; +} + /** * Generate the fragment shader, depth/stencil test, and alpha tests. @@ -261,7 +219,7 @@ generate_fs(struct lp_fragment_shader *shader, struct lp_type type, LLVMValueRef context_ptr, unsigned i, - const struct lp_build_interp_soa_context *interp, + struct lp_build_interp_soa_context *interp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef (*color)[4], @@ -271,18 +229,52 @@ generate_fs(struct lp_fragment_shader *shader, LLVMValueRef mask_input, LLVMValueRef counter) { + const struct util_format_description *zs_format_desc = NULL; const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef vec_type; LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; - LLVMValueRef z = interp->pos[2]; + LLVMValueRef z; + LLVMValueRef zs_value = NULL; LLVMValueRef stencil_refs[2]; - struct lp_build_flow_context *flow; struct lp_build_mask_context mask; - boolean early_depth_stencil_test; + boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 && + shader->info.base.num_inputs < 3 && + shader->info.base.num_instructions < 8); unsigned attrib; unsigned chan; unsigned cbuf; + unsigned depth_mode; + + if (key->depth.enabled || + key->stencil[0].enabled || + key->stencil[1].enabled) { + + zs_format_desc = util_format_description(key->zsbuf_format); + assert(zs_format_desc); + + if (!shader->info.base.writes_z) { + if (key->alpha.enabled || shader->info.base.uses_kill) + /* With alpha test and kill, can do the depth test early + * and hopefully eliminate some quads. But need to do a + * special deferred depth write once the final mask value + * is known. + */ + depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE; + else + depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE; + } + else { + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; + } + + if (!(key->depth.enabled && key->depth.writemask) && + !(key->stencil[0].enabled && key->stencil[0].writemask)) + depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE); + } + else { + depth_mode = 0; + } assert(i < 4); @@ -293,20 +285,14 @@ generate_fs(struct lp_fragment_shader *shader, consts_ptr = lp_jit_context_constants(builder, context_ptr); - flow = lp_build_flow_create(builder); - memset(outputs, 0, sizeof outputs); - lp_build_flow_scope_begin(flow); - /* Declare the color and z variables */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { - color[cbuf][chan] = LLVMGetUndef(vec_type); - lp_build_flow_scope_declare(flow, &color[cbuf][chan]); + color[cbuf][chan] = lp_build_alloca(builder, vec_type, "color"); } } - lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ if (partial_mask) { @@ -318,74 +304,126 @@ generate_fs(struct lp_fragment_shader *shader, } /* 'mask' will control execution based on quad's pixel alive/killed state */ - lp_build_mask_begin(&mask, flow, type, *pmask); - - early_depth_stencil_test = - (key->depth.enabled || key->stencil[0].enabled) && - !key->alpha.enabled && - !shader->info.uses_kill && - !shader->info.writes_z; - - if (early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + lp_build_mask_begin(&mask, builder, type, *pmask); + + if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) + lp_build_mask_check(&mask); + + lp_build_interp_soa_update_pos(interp, i); + z = interp->pos[2]; + + if (depth_mode & EARLY_DEPTH_TEST) { + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + + if (depth_mode & EARLY_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value); + } + } + lp_build_interp_soa_update_inputs(interp, i); + + /* Build the actual shader */ lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, sampler, &shader->info); + outputs, sampler, &shader->info.base); - /* loop over fragment shader outputs/results */ - for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(outputs[attrib][chan]) { - LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); - lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]); - - switch (shader->info.output_semantic_name[attrib]) { - case TGSI_SEMANTIC_COLOR: - { - unsigned cbuf = shader->info.output_semantic_index[attrib]; - - lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); - - /* Alpha test */ - /* XXX: should only test the final assignment to alpha */ - if (cbuf == 0 && chan == 3 && key->alpha.enabled) { - LLVMValueRef alpha = out; - LLVMValueRef alpha_ref_value; - alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); - alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); - lp_build_alpha_test(builder, key->alpha.func, type, - &mask, alpha, alpha_ref_value); - } - - color[cbuf][chan] = out; - break; - } - - case TGSI_SEMANTIC_POSITION: - if(chan == 2) - z = out; - break; - } - } + + /* Alpha test */ + if (key->alpha.enabled) { + int color0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_COLOR, + 0); + + if (color0 != -1 && outputs[color0][3]) { + LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); + LLVMValueRef alpha_ref_value; + + alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); + alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); + + lp_build_alpha_test(builder, key->alpha.func, type, + &mask, alpha, alpha_ref_value, + (depth_mode & LATE_DEPTH_TEST) != 0); } } - if (!early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + /* Late Z test */ + if (depth_mode & LATE_DEPTH_TEST) { + int pos0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_POSITION, + 0); + + if (pos0 != -1 && outputs[pos0][2]) { + z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); + } - lp_build_mask_end(&mask); + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + /* Late Z write */ + if (depth_mode & LATE_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value); + } + } + else if ((depth_mode & EARLY_DEPTH_TEST) && + (depth_mode & LATE_DEPTH_WRITE)) + { + /* Need to apply a reduced mask to the depth write. Reload the + * depth value, update from zs_value with the new mask value and + * write that out. + */ + lp_build_deferred_depth_write(builder, + type, + zs_format_desc, + &mask, + depth_ptr, + zs_value); + } - lp_build_flow_scope_end(flow); - lp_build_flow_destroy(flow); + /* Color write */ + for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) + { + if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR && + shader->info.base.output_semantic_index[attrib] < key->nr_cbufs) + { + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + /* XXX: just initialize outputs to point at colors[] and + * skip this. + */ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); + LLVMBuildStore(builder, out, color[cbuf][chan]); + } + } + } + } - *pmask = mask.value; + if (counter) + lp_build_occlusion_count(builder, type, + lp_build_mask_value(&mask), counter); + *pmask = lp_build_mask_end(&mask); } @@ -406,10 +444,10 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, - LLVMValueRef dst_ptr) + LLVMValueRef dst_ptr, + boolean do_branch) { struct lp_build_context bld; - struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMValueRef const_ptr; @@ -420,10 +458,9 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_context_init(&bld, builder, type); - flow = lp_build_flow_create(builder); - - /* we'll use this mask context to skip blending if all pixels are dead */ - lp_build_mask_begin(&mask_ctx, flow, type, mask); + lp_build_mask_begin(&mask_ctx, builder, type, mask); + if (do_branch) + lp_build_mask_check(&mask_ctx); vec_type = lp_build_vec_type(type); @@ -456,7 +493,6 @@ generate_blend(const struct pipe_blend_state *blend, } lp_build_mask_end(&mask_ctx); - lp_build_flow_destroy(flow); } @@ -501,6 +537,7 @@ generate_fragment(struct llvmpipe_screen *screen, LLVMValueRef blend_mask; LLVMValueRef function; LLVMValueRef facing; + const struct util_format_description *zs_format_desc; unsigned num_fs; unsigned i; unsigned chan; @@ -508,8 +545,8 @@ generate_fragment(struct llvmpipe_screen *screen, /* Adjust color input interpolation according to flatshade state: */ - memcpy(inputs, shader->inputs, shader->info.num_inputs * sizeof inputs[0]); - for (i = 0; i < shader->info.num_inputs; i++) { + memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]); + for (i = 0; i < shader->info.base.num_inputs; i++) { if (inputs[i].interp == LP_INTERP_COLOR) { if (key->flatshade) inputs[i].interp = LP_INTERP_CONSTANT; @@ -553,12 +590,12 @@ generate_fragment(struct llvmpipe_screen *screen, arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ - arg_types[3] = LLVMFloatType(); /* facing */ + arg_types[3] = LLVMInt32Type(); /* facing */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ - arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[8] = LLVMPointerType(LLVMInt8Type(), 0); /* depth */ arg_types[9] = LLVMInt32Type(); /* mask_input */ arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ @@ -616,7 +653,7 @@ generate_fragment(struct llvmpipe_screen *screen, * already included in the shader key. */ lp_build_interp_soa_init(&interp, - shader->info.num_inputs, + shader->info.base.num_inputs, inputs, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, @@ -626,15 +663,16 @@ generate_fragment(struct llvmpipe_screen *screen, sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); /* loop over quads in the block */ + zs_format_desc = util_format_description(key->zsbuf_format); + for(i = 0; i < num_fs; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef depth_offset = LLVMConstInt(LLVMInt32Type(), + i*fs_type.length*zs_format_desc->block.bits/8, + 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; - if(i != 0) - lp_build_interp_soa_update(&interp, i); - - depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); + depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); generate_fs(shader, key, builder, @@ -670,9 +708,18 @@ generate_fragment(struct llvmpipe_screen *screen, * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { + LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH]; + + for (i = 0; i < num_fs; i++) { + fs_color_vals[i] = + LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals"); + } + lp_build_conv(builder, fs_type, blend_type, - fs_out_color[cbuf][chan], num_fs, + fs_color_vals, + num_fs, &blend_in_color[chan], 1); + lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } @@ -695,14 +742,23 @@ generate_fragment(struct llvmpipe_screen *screen, /* * Blending. */ - generate_blend(&key->blend, - rt, - builder, - blend_type, - context_ptr, - blend_mask, - blend_in_color, - color_ptr); + { + /* Could the 4x4 have been killed? + */ + boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) && + !key->alpha.enabled && + !shader->info.base.uses_kill); + + generate_blend(&key->blend, + rt, + builder, + blend_type, + context_ptr, + blend_mask, + blend_in_color, + color_ptr, + do_branch); + } } #ifdef PIPE_ARCH_X86 @@ -727,12 +783,17 @@ generate_fragment(struct llvmpipe_screen *screen, /* Apply optimizations to LLVM IR */ LLVMRunFunctionPassManager(screen->pass, function); - if (gallivm_debug & GALLIVM_DEBUG_IR) { + if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) { /* Print the LLVM IR to stderr */ lp_debug_dump_value(function); debug_printf("\n"); } + /* Dump byte code to a file */ + if (0) { + LLVMWriteBitcodeToFile(lp_build_module, "llvmpipe.bc"); + } + /* * Translate the LLVM IR into machine code. */ @@ -741,7 +802,7 @@ generate_fragment(struct llvmpipe_screen *screen, variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); - if (gallivm_debug & GALLIVM_DEBUG_ASM) { + if ((gallivm_debug & GALLIVM_DEBUG_ASM) || (LP_DEBUG & DEBUG_FS)) { lp_disassemble(f); } lp_func_delete_body(function); @@ -756,6 +817,9 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("fs variant %p:\n", (void *) key); + if (key->flatshade) { + debug_printf("flatshade = 1\n"); + } for (i = 0; i < key->nr_cbufs; ++i) { debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i])); } @@ -780,6 +844,10 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE)); } + if (key->occlusion_count) { + debug_printf("occlusion_count = 1\n"); + } + if (key->blend.logicop_enable) { debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE)); } @@ -792,31 +860,33 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask); - for (i = 0; i < PIPE_MAX_SAMPLERS; ++i) { - if (key->sampler[i].format) { - debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", - util_format_name(key->sampler[i].format)); - debug_printf(" .target = %s\n", - util_dump_tex_target(key->sampler[i].target, TRUE)); - debug_printf(" .pot = %u %u %u\n", - key->sampler[i].pot_width, - key->sampler[i].pot_height, - key->sampler[i].pot_depth); - debug_printf(" .wrap = %s %s %s\n", - util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); - debug_printf(" .min_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); - debug_printf(" .min_mip_filter = %s\n", - util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); - debug_printf(" .mag_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) - debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); - debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); - } + for (i = 0; i < key->nr_samplers; ++i) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + util_format_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + util_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .min_max_lod_equal = %u\n", key->sampler[i].min_max_lod_equal); + debug_printf(" .lod_bias_non_zero = %u\n", key->sampler[i].lod_bias_non_zero); + debug_printf(" .apply_min_lod = %u\n", key->sampler[i].apply_min_lod); + debug_printf(" .apply_max_lod = %u\n", key->sampler[i].apply_max_lod); } } @@ -871,7 +941,7 @@ generate_variant(struct llvmpipe_screen *screen, !key->stencil[0].enabled && !key->alpha.enabled && !key->depth.enabled && - !shader->info.uses_kill + !shader->info.base.uses_kill ? TRUE : FALSE; @@ -896,6 +966,7 @@ static void * llvmpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); struct lp_fragment_shader *shader; int nr_samplers; int i; @@ -908,20 +979,27 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, make_empty_list(&shader->variants); /* get/save the summary info for this shader */ - tgsi_scan_shader(templ->tokens, &shader->info); + lp_build_tgsi_info(templ->tokens, &shader->info); /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); - nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ); + if (shader->draw_data == NULL) { + FREE((void *) shader->base.tokens); + FREE(shader); + return NULL; + } + + nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, sampler[nr_samplers]); - for (i = 0; i < shader->info.num_inputs; i++) { - shader->inputs[i].usage_mask = shader->info.input_usage_mask[i]; + for (i = 0; i < shader->info.base.num_inputs; i++) { + shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i]; - switch (shader->info.input_interpolate[i]) { + switch (shader->info.base.input_interpolate[i]) { case TGSI_INTERPOLATE_CONSTANT: shader->inputs[i].interp = LP_INTERP_CONSTANT; break; @@ -936,7 +1014,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, break; } - switch (shader->info.input_semantic_name[i]) { + switch (shader->info.base.input_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: /* Colors may be either linearly or constant interpolated in * the fragment shader, but that information isn't available @@ -963,8 +1041,8 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); tgsi_dump(templ->tokens, 0); debug_printf("usage masks:\n"); - for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) { - unsigned usage_mask = shader->info.input_usage_mask[attrib]; + for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) { + unsigned usage_mask = shader->info.base.input_usage_mask[attrib]; debug_printf(" IN[%u].%s%s%s%s\n", attrib, usage_mask & TGSI_WRITEMASK_X ? "x" : "", @@ -989,6 +1067,9 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) draw_flush(llvmpipe->draw); + draw_bind_fragment_shader(llvmpipe->draw, + (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL)); + llvmpipe->fs = fs; llvmpipe->dirty |= LP_NEW_FS; @@ -1046,6 +1127,8 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) li = next; } + draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data); + assert(shader->variants_cached == 0); FREE((void *) shader->base.tokens); FREE(shader); @@ -1087,7 +1170,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, * Return the blend factor equivalent to a destination alpha of one. */ static INLINE unsigned -force_dst_alpha_one(unsigned factor, boolean alpha) +force_dst_alpha_one(unsigned factor) { switch(factor) { case PIPE_BLENDFACTOR_DST_ALPHA: @@ -1098,15 +1181,6 @@ force_dst_alpha_one(unsigned factor, boolean alpha) return PIPE_BLENDFACTOR_ZERO; } - if (alpha) { - switch(factor) { - case PIPE_BLENDFACTOR_DST_COLOR: - return PIPE_BLENDFACTOR_ONE; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return PIPE_BLENDFACTOR_ZERO; - } - } - return factor; } @@ -1183,21 +1257,24 @@ make_variant_key(struct llvmpipe_context *lp, * * TODO: This should be generalized to all channels for better * performance, but only alpha causes correctness issues. + * + * Also, force rgb/alpha func/factors match, to make AoS blending easier. */ if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W) { - blend_rt->rgb_src_factor = force_dst_alpha_one(blend_rt->rgb_src_factor, FALSE); - blend_rt->rgb_dst_factor = force_dst_alpha_one(blend_rt->rgb_dst_factor, FALSE); - blend_rt->alpha_src_factor = force_dst_alpha_one(blend_rt->alpha_src_factor, TRUE); - blend_rt->alpha_dst_factor = force_dst_alpha_one(blend_rt->alpha_dst_factor, TRUE); + blend_rt->rgb_src_factor = force_dst_alpha_one(blend_rt->rgb_src_factor); + blend_rt->rgb_dst_factor = force_dst_alpha_one(blend_rt->rgb_dst_factor); + blend_rt->alpha_func = blend_rt->rgb_func; + blend_rt->alpha_src_factor = blend_rt->rgb_src_factor; + blend_rt->alpha_dst_factor = blend_rt->rgb_dst_factor; } } /* This value will be the same for all the variants of a given shader: */ - key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; for(i = 0; i < key->nr_samplers; ++i) { - if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index f73c7801c0..7d58c4936c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -34,6 +34,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */ #include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ +#include "gallivm/lp_bld_tgsi.h" /* for lp_tgsi_info */ #include "lp_bld_interp.h" /* for struct lp_shader_input */ @@ -97,10 +98,12 @@ struct lp_fragment_shader { struct pipe_shader_state base; - struct tgsi_shader_info info; + struct lp_tgsi_info info; struct lp_fs_variant_list_item variants; + struct draw_fragment_shader *draw_data; + /* For debugging/profiling purposes */ unsigned variant_key_size; unsigned no; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 0bad7320f3..dbd73812e4 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -78,8 +78,9 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) lp_setup_set_point_state( llvmpipe->setup, llvmpipe->rasterizer->point_size, llvmpipe->rasterizer->point_size_per_vertex, - llvmpipe->rasterizer->sprite_coord_enable); - } + llvmpipe->rasterizer->sprite_coord_enable, + llvmpipe->rasterizer->sprite_coord_mode); + } llvmpipe->dirty |= LP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 17a4a0ed02..1dd866195d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -246,9 +246,9 @@ llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp, struct pipe_sampler_view **views) { unsigned i; - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; - const void *data[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + const void *data[PIPE_MAX_TEXTURE_LEVELS]; assert(num <= PIPE_MAX_VERTEX_SAMPLERS); if (!num) diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index ee4991bf8d..a8dee280dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -603,7 +603,7 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp, assert(sizeof key->inputs[0] == sizeof(ushort)); - key->num_inputs = fs->info.num_inputs; + key->num_inputs = fs->info.base.num_inputs; key->flatshade_first = lp->rasterizer->flatshade_first; key->pixel_center_half = lp->rasterizer->gl_rasterization_rules; key->size = Offset(struct lp_setup_variant_key, diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index d0389f0cb0..8b6b5e1298 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -243,19 +243,6 @@ add_blend_test(LLVMModuleRef module, } -/** Add and limit result to ceiling of 1.0 */ -#define ADD_SAT(R, A, B) \ -do { \ - R = (A) + (B); if (R > 1.0f) R = 1.0f; \ -} while (0) - -/** Subtract and limit result to floor of 0.0 */ -#define SUB_SAT(R, A, B) \ -do { \ - R = (A) - (B); if (R < 0.0f) R = 0.0f; \ -} while (0) - - static void compute_blend_ref_term(unsigned rgb_factor, unsigned alpha_factor, @@ -423,19 +410,19 @@ compute_blend_ref(const struct pipe_blend_state *blend, */ switch (blend->rt[0].rgb_func) { case PIPE_BLEND_ADD: - ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */ - ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */ - ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */ + res[0] = src_term[0] + dst_term[0]; /* R */ + res[1] = src_term[1] + dst_term[1]; /* G */ + res[2] = src_term[2] + dst_term[2]; /* B */ break; case PIPE_BLEND_SUBTRACT: - SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */ - SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */ - SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */ + res[0] = src_term[0] - dst_term[0]; /* R */ + res[1] = src_term[1] - dst_term[1]; /* G */ + res[2] = src_term[2] - dst_term[2]; /* B */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */ - SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */ - SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */ + res[0] = dst_term[0] - src_term[0]; /* R */ + res[1] = dst_term[1] - src_term[1]; /* G */ + res[2] = dst_term[2] - src_term[2]; /* B */ break; case PIPE_BLEND_MIN: res[0] = MIN2(src_term[0], dst_term[0]); /* R */ @@ -456,13 +443,13 @@ compute_blend_ref(const struct pipe_blend_state *blend, */ switch (blend->rt[0].alpha_func) { case PIPE_BLEND_ADD: - ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */ + res[3] = src_term[3] + dst_term[3]; /* A */ break; case PIPE_BLEND_SUBTRACT: - SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */ + res[3] = src_term[3] - dst_term[3]; /* A */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */ + res[3] = dst_term[3] - src_term[3]; /* A */ break; case PIPE_BLEND_MIN: res[3] = MIN2(src_term[3], dst_term[3]); /* A */ @@ -676,6 +663,8 @@ test_one(unsigned verbose, fprintf(stderr, " Ref%c: ", channel); dump_vec(stderr, type, ref + j*stride); fprintf(stderr, "\n"); + + fprintf(stderr, "\n"); } } } @@ -773,7 +762,7 @@ blend_funcs[] = { const struct lp_type blend_types[] = { /* float, fixed, sign, norm, width, len */ - { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ + { TRUE, FALSE, TRUE, FALSE, 32, 4 }, /* f32 x 4 */ { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ }; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 7bbbc61d4c..7a0d06ae2c 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -205,16 +205,19 @@ random_elem(struct lp_type type, void *dst, unsigned index) assert(index < type.length); value = (double)rand()/(double)RAND_MAX; if(!type.norm) { - unsigned long long mask; - if (type.floating) - mask = ~(unsigned long long)0; - else if (type.fixed) - mask = ((unsigned long long)1 << (type.width / 2)) - 1; - else if (type.sign) - mask = ((unsigned long long)1 << (type.width - 1)) - 1; - else - mask = ((unsigned long long)1 << type.width) - 1; - value += (double)(mask & rand()); + if (type.floating) { + value *= 2.0; + } + else { + unsigned long long mask; + if (type.fixed) + mask = ((unsigned long long)1 << (type.width / 2)) - 1; + else if (type.sign) + mask = ((unsigned long long)1 << (type.width - 1)) - 1; + else + mask = ((unsigned long long)1 << type.width) - 1; + value += (double)(mask & rand()); + } } if(!type.sign) if(rand() & 1) @@ -261,12 +264,18 @@ boolean compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps) { unsigned i; + eps *= type.floating ? 8.0 : 2.0; for (i = 0; i < type.length; ++i) { double res_elem = read_elem(type, res, i); double ref_elem = read_elem(type, ref, i); - double delta = fabs(res_elem - ref_elem); - if(delta >= 2.0*eps) + double delta = res_elem - ref_elem; + if (ref_elem < -1.0 || ref_elem > 1.0) { + delta /= ref_elem; + } + delta = fabs(delta); + if (delta >= eps) { return FALSE; + } } return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index 57b0ee5776..0ca2791592 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -75,10 +75,7 @@ add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func) LLVMValueRef ret; struct lp_build_context bld; - bld.builder = builder; - bld.type.floating = 1; - bld.type.width = 32; - bld.type.length = 4; + lp_build_context_init(&bld, builder, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); @@ -100,9 +97,10 @@ printv(char* string, v4sf value) f[0], f[1], f[2], f[3]); } -static void +static boolean compare(v4sf x, v4sf y) { + boolean success = TRUE; float *xp = (float *) &x; float *yp = (float *) &y; if (xp[0] != yp[0] || @@ -110,7 +108,9 @@ compare(v4sf x, v4sf y) xp[2] != yp[2] || xp[3] != yp[3]) { printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n"); + success = FALSE; } + return success; } @@ -191,7 +191,7 @@ test_round(unsigned verbose, FILE *fp) y = round_func(x); printv("C round(x) ", ref); printv("LLVM round(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = trunc(xp[0]); refp[1] = trunc(xp[1]); @@ -200,7 +200,7 @@ test_round(unsigned verbose, FILE *fp) y = trunc_func(x); printv("C trunc(x) ", ref); printv("LLVM trunc(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = floor(xp[0]); refp[1] = floor(xp[1]); @@ -209,7 +209,7 @@ test_round(unsigned verbose, FILE *fp) y = floor_func(x); printv("C floor(x) ", ref); printv("LLVM floor(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = ceil(xp[0]); refp[1] = ceil(xp[1]); @@ -218,7 +218,7 @@ test_round(unsigned verbose, FILE *fp) y = ceil_func(x); printv("C ceil(x) ", ref); printv("LLVM ceil(x) ", y); - compare(ref, y); + success = success && compare(ref, y); } LLVMFreeMachineCodeForFunction(engine, test_round); @@ -247,11 +247,7 @@ test_round(unsigned verbose, FILE *fp) boolean test_all(unsigned verbose, FILE *fp) { - boolean success = TRUE; - - test_round(verbose, fp); - - return success; + return test_round(verbose, fp); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index 7ab357f162..79939b1a39 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -72,10 +72,7 @@ add_sincos_test(LLVMModuleRef module, boolean sin) LLVMValueRef ret; struct lp_build_context bld; - bld.builder = builder; - bld.type.floating = 1; - bld.type.width = 32; - bld.type.length = 4; + lp_build_context_init(&bld, builder, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index 4e026cc8ff..f417fc8a9e 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -151,6 +151,10 @@ LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL, TRUE) LP_LLVM_TEXTURE_MEMBER(row_stride, LP_JIT_TEXTURE_ROW_STRIDE, FALSE) LP_LLVM_TEXTURE_MEMBER(img_stride, LP_JIT_TEXTURE_IMG_STRIDE, FALSE) LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA, FALSE) +LP_LLVM_TEXTURE_MEMBER(min_lod, LP_JIT_TEXTURE_MIN_LOD, TRUE) +LP_LLVM_TEXTURE_MEMBER(max_lod, LP_JIT_TEXTURE_MAX_LOD, TRUE) +LP_LLVM_TEXTURE_MEMBER(lod_bias, LP_JIT_TEXTURE_LOD_BIAS, TRUE) +LP_LLVM_TEXTURE_MEMBER(border_color, LP_JIT_TEXTURE_BORDER_COLOR, FALSE) static void @@ -217,6 +221,11 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, sampler->dynamic_state.base.row_stride = lp_llvm_texture_row_stride; sampler->dynamic_state.base.img_stride = lp_llvm_texture_img_stride; sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; + sampler->dynamic_state.base.min_lod = lp_llvm_texture_min_lod; + sampler->dynamic_state.base.max_lod = lp_llvm_texture_max_lod; + sampler->dynamic_state.base.lod_bias = lp_llvm_texture_lod_bias; + sampler->dynamic_state.base.border_color = lp_llvm_texture_border_color; + sampler->dynamic_state.static_state = static_state; sampler->dynamic_state.context_ptr = context_ptr; diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 2ba39052ab..e49f9c62fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -289,172 +289,141 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): print -def generate_ssse3(): +def generate_sse2(): print ''' #if defined(PIPE_ARCH_SSE) #include "util/u_sse.h" -static void -lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, - const uint8_t *src, unsigned src_stride, - unsigned x0, unsigned y0) +static ALWAYS_INLINE void +swz4( const __m128i * restrict x, + const __m128i * restrict y, + const __m128i * restrict z, + const __m128i * restrict w, + __m128i * restrict a, + __m128i * restrict b, + __m128i * restrict c, + __m128i * restrict d) { + __m128i i, j, k, l; + __m128i m, n, o, p; + __m128i e, f, g, h; + + m = _mm_unpacklo_epi8(*x,*y); + n = _mm_unpackhi_epi8(*x,*y); + o = _mm_unpacklo_epi8(*z,*w); + p = _mm_unpackhi_epi8(*z,*w); + + i = _mm_unpacklo_epi16(m,n); + j = _mm_unpackhi_epi16(m,n); + k = _mm_unpacklo_epi16(o,p); + l = _mm_unpackhi_epi16(o,p); + + e = _mm_unpacklo_epi8(i,j); + f = _mm_unpackhi_epi8(i,j); + g = _mm_unpacklo_epi8(k,l); + h = _mm_unpackhi_epi8(k,l); + + *a = _mm_unpacklo_epi64(e,g); + *b = _mm_unpackhi_epi64(e,g); + *c = _mm_unpacklo_epi64(f,h); + *d = _mm_unpackhi_epi64(f,h); +} + +static ALWAYS_INLINE void +unswz4( const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict x, + __m128i * restrict y, + __m128i * restrict z, + __m128i * restrict w) +{ + __m128i i, j, k, l; + __m128i m, n, o, p; + + i = _mm_unpacklo_epi8(*a,*b); + j = _mm_unpackhi_epi8(*a,*b); + k = _mm_unpacklo_epi8(*c,*d); + l = _mm_unpackhi_epi8(*c,*d); + + m = _mm_unpacklo_epi16(i,k); + n = _mm_unpackhi_epi16(i,k); + o = _mm_unpacklo_epi16(j,l); + p = _mm_unpackhi_epi16(j,l); + + *x = _mm_unpacklo_epi64(m,n); + *y = _mm_unpackhi_epi64(m,n); + *z = _mm_unpacklo_epi64(o,p); + *w = _mm_unpackhi_epi64(o,p); +} +static void +lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst, + const uint8_t * restrict src, unsigned src_stride, + unsigned x0, unsigned y0) +{ + __m128i *dst128 = (__m128i *) dst; unsigned x, y; - __m128i *pdst = (__m128i*) dst; - const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t); - unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH; - unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT; - - const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff); - const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff); - const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff); - const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff); - - const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e); - const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d); - const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c); - const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f); - - for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { - __m128i line0 = *(__m128i*)ysrc0; - const uint8_t *ysrc = ysrc0 + src_stride; - ysrc0 += tile_stridey; - - for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { - __m128i r, g, b, a, line1; - line1 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc += src_stride; - r = _mm_shuffle_epi8(line0, shuffle00); - g = _mm_shuffle_epi8(line0, shuffle01); - b = _mm_shuffle_epi8(line0, shuffle02); - a = _mm_shuffle_epi8(line0, shuffle03); - - line0 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc += src_stride; - r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13)); - - line1 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc -= tile_stridex; - r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23)); - - if (x + 1 < TILE_SIZE) { - line0 = *(__m128i*)ysrc; - ysrc += src_stride; - } - - PIPE_READ_WRITE_BARRIER(); - r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33)); - - *pdst++ = r; - *pdst++ = g; - *pdst++ = b; - *pdst++ = a; + + src += y0 * src_stride; + src += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *src_row = src; + + for (x = 0; x < TILE_SIZE; x += 4) { + swz4((const __m128i *) (src_row + 0 * src_stride), + (const __m128i *) (src_row + 1 * src_stride), + (const __m128i *) (src_row + 2 * src_stride), + (const __m128i *) (src_row + 3 * src_stride), + dst128 + 2, /* b */ + dst128 + 1, /* g */ + dst128 + 0, /* r */ + dst128 + 3); /* a */ + + dst128 += 4; + src_row += sizeof(__m128i); } - } + src += 4 * src_stride; + } } static void -lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src, - uint8_t *dst, unsigned dst_stride, +lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src, + uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0) { unsigned int x, y; - const __m128i *psrc = (__m128i*) src; - const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t)); - uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t); - __m128i c0 = *psrc++; - __m128i c1; - - const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff); - const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff); - const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff); - const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff); - - const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff); - const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff); - const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff); - const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff); - - const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff); - const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff); - const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff); - const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff); - - const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05); - const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07); - const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d); - const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f); - - for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { - __m128i *tile = (__m128i*) pdst; - pdst += dst_stride * TILE_VECTOR_HEIGHT; - for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { - uint8_t *linep = (uint8_t*) (tile++); - __m128i line0, line1, line2, line3; - - c1 = *psrc++; /* r */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_shuffle_epi8(c0, shuffle00); - line1 = _mm_shuffle_epi8(c0, shuffle01); - line2 = _mm_shuffle_epi8(c0, shuffle02); - line3 = _mm_shuffle_epi8(c0, shuffle03); - - c0 = *psrc++; /* g */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13)); - - c1 = *psrc++; /* b */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23)); - - if (psrc != end) - c0 = *psrc++; /* a */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33)); - - *(__m128i*) (linep) = line0; - *(__m128i*) (((char*)linep) + dst_stride) = line1; - *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2; - *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3; + const __m128i *src128 = (const __m128i *) src; + + dst += y0 * dst_stride; + dst += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *dst_row = dst; + + for (x = 0; x < TILE_SIZE; x += 4) { + unswz4( &src128[2], /* b */ + &src128[1], /* g */ + &src128[0], /* r */ + &src128[3], /* a */ + (__m128i *) (dst_row + 0 * dst_stride), + (__m128i *) (dst_row + 1 * dst_stride), + (__m128i *) (dst_row + 2 * dst_stride), + (__m128i *) (dst_row + 3 * dst_stride)); + + src128 += 4; + dst_row += sizeof(__m128i);; } + + dst += 4 * dst_stride; } } -#endif /* PIPE_ARCH_SSSE3 */ +#endif /* PIPE_ARCH_SSE */ ''' @@ -479,7 +448,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix) if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' print ' func = %s;' % (func_name,) print '#endif' @@ -517,7 +486,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix) if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' print ' func = %s;' % (func_name,) print '#endif' @@ -577,7 +546,7 @@ def main(): print '};' print - generate_ssse3() + generate_sse2() channel = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index ebb21a6e5a..a9426df686 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -236,7 +236,7 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) int ret; ret = nouveau_channel_alloc(dev, 0xbeef0201, 0xbeef0202, - &screen->channel); + 512*1024, &screen->channel); if (ret) return ret; screen->device = dev; diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index c9003c97f5..ab480cabd0 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -12,6 +12,10 @@ #include "nouveau/nouveau_resource.h" #include "nouveau/nouveau_pushbuf.h" +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + static INLINE uint32_t nouveau_screen_transfer_flags(unsigned pipe) { diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 6ec9095a74..bf6a577188 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -26,6 +26,9 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +#define nouveau_bo_tile_layout(nvbo) \ + ((nvbo)->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) + /* Constant buffer assignment */ #define NV50_CB_PMISC 0 #define NV50_CB_PVP 1 @@ -157,6 +160,7 @@ struct nv50_context { unsigned sampler_view_nr[3]; unsigned vbo_fifo; + unsigned req_lmem; }; static INLINE struct nv50_context * diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c index 3be39d5337..4282809454 100644 --- a/src/gallium/drivers/nv50/nv50_formats.c +++ b/src/gallium/drivers/nv50/nv50_formats.c @@ -53,6 +53,8 @@ #define NV50TIC_0_0_FMT_16_16_16 NV50TIC_0_0_FMT_16_16_16_16 #define NV50TIC_0_0_FMT_32_32_32 NV50TIC_0_0_FMT_32_32_32_32 +/* NOTE: using NV50_2D_DST_FORMAT for substitute formats used with 2D engine */ + const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = { /* COMMON FORMATS */ @@ -81,7 +83,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B4G4R4A4_UNORM] = { 0, + [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_2D_DST_FORMAT_R16_UNORM, B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), SAMPLER_VIEW }, @@ -122,15 +124,15 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* LUMINANCE, ALPHA, INTENSITY */ - [PIPE_FORMAT_L8_UNORM] = { 0, + [PIPE_FORMAT_L8_UNORM] = { NV50_2D_DST_FORMAT_R8_UNORM, A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW }, - [PIPE_FORMAT_L8_SRGB] = { 0, + [PIPE_FORMAT_L8_SRGB] = { NV50_2D_DST_FORMAT_R8_UNORM, A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW }, - [PIPE_FORMAT_I8_UNORM] = { 0, + [PIPE_FORMAT_I8_UNORM] = { NV50_2D_DST_FORMAT_R8_UNORM, A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW }, @@ -138,7 +140,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_L8A8_UNORM] = { 0, + [PIPE_FORMAT_L8A8_UNORM] = { NV50_2D_DST_FORMAT_R16_UNORM, A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), SAMPLER_VIEW }, @@ -276,9 +278,9 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16_SNORM] = { 0, + [PIPE_FORMAT_R16_SNORM] = { NV50TCL_RT_FORMAT_R16_SNORM, A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, /* UNORM 16 */ @@ -294,9 +296,9 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16_UNORM] = { 0, + [PIPE_FORMAT_R16_UNORM] = { NV50TCL_RT_FORMAT_R16_UNORM, A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, /* SNORM 8 */ diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index 676540538e..c88e7ba742 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -328,7 +328,7 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv) static void nv_do_print_function(void *priv, struct nv_basic_block *b) { - struct nv_instruction *i = b->phi; + struct nv_instruction *i; debug_printf("=== BB %i ", b->id); if (b->out[0]) @@ -547,6 +547,8 @@ nv50_generate_code(struct nv50_translation_info *ti) ti->p->fixups = pc->fixups; ti->p->num_fixups = pc->num_fixups; + ti->p->uses_lmem = ti->store_to_memory; + NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); out: diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 921ed15691..27eb3817bf 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -452,7 +452,7 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) if (nvi->opcode == NV_OP_SAT) { mi = nvi->src[0]->value->insn; - if (mi->opcode != NV_OP_ADD || mi->opcode != NV_OP_MAD) + if (mi->opcode != NV_OP_ADD && mi->opcode != NV_OP_MAD) continue; if (mi->flags_def || mi->def[0]->refc > 1) continue; diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c index b9d5ba5ef6..39ae36681c 100644 --- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c +++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c @@ -767,7 +767,7 @@ nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set) static void insert_ordered_tail(struct nv_value *list, struct nv_value *nval) { - struct nv_value *elem = list->prev; + struct nv_value *elem; for (elem = list->prev; elem != list && elem->livei->bgn > nval->livei->bgn; diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 37b02bbec7..33c4c8ca6d 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -45,6 +45,7 @@ struct nv50_program { ubyte type; boolean translated; + boolean uses_lmem; struct nouveau_bo *bo; struct nouveau_stateobj *so; diff --git a/src/gallium/drivers/nv50/nv50_reg.h b/src/gallium/drivers/nv50/nv50_reg.h index 365576fdd0..949838b33f 100644 --- a/src/gallium/drivers/nv50/nv50_reg.h +++ b/src/gallium/drivers/nv50/nv50_reg.h @@ -1018,6 +1018,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50TCL_FP_START_ID 0x00001414 #define NV50TCL_GP_VERTEX_OUTPUT_COUNT 0x00001420 #define NV50TCL_VB_ELEMENT_BASE 0x00001434 +#define NV50TCL_CLEAR_FLAGS 0x0000143c +#define NV50TCL_CLEAR_FLAGS_OGL (1 << 0) +#define NV50TCL_CLEAR_FLAGS_D3D (1 << 4) #define NV50TCL_INSTANCE_BASE 0x00001438 #define NV50TCL_CODE_CB_FLUSH 0x00001440 #define NV50TCL_BIND_TSC(x) (0x00001444+((x)*8)) diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 49af9b59be..c6bd62df1d 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -84,6 +84,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TWO_SIDED_STENCIL: return 1; case PIPE_CAP_GLSL: + case PIPE_CAP_SM3: return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; @@ -95,6 +96,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_TIMER_QUERY: return 0; + case PIPE_CAP_STREAM_OUTPUT: + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: @@ -137,8 +140,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, switch(shader) { case PIPE_SHADER_FRAGMENT: case PIPE_SHADER_VERTEX: - case PIPE_SHADER_GEOMETRY: break; + case PIPE_SHADER_GEOMETRY: default: return 0; } @@ -158,6 +161,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 64 / 4; case PIPE_SHADER_CAP_MAX_CONSTS: return 65536 / 16; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: /* 16 - 1, but not implemented */ + return 1; case PIPE_SHADER_CAP_MAX_ADDRS: /* no spilling atm */ return 1; case PIPE_SHADER_CAP_MAX_PREDS: /* not yet handled */ @@ -222,6 +227,36 @@ nv50_screen_destroy(struct pipe_screen *pscreen) OUT_RELOC(ch, bo, (n << 18) | (gr->subc << 13) | m, fl, 0, 0) void +nv50_screen_reloc_constbuf(struct nv50_screen *screen, unsigned cbi) +{ + struct nouveau_bo *bo; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *tesla = screen->tesla; + unsigned size; + const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; + + switch (cbi) { + case NV50_CB_PMISC: + bo = screen->constbuf_misc[0]; + size = 0x200; + break; + case NV50_CB_PVP: + case NV50_CB_PFP: + case NV50_CB_PGP: + bo = screen->constbuf_parm[cbi - NV50_CB_PVP]; + size = 0; + break; + default: + return; + } + + BGN_RELOC (chan, bo, tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); + OUT_RELOCh(chan, bo, 0, rl); + OUT_RELOCl(chan, bo, 0, rl); + OUT_RELOC (chan, bo, (cbi << 16) | size, rl, 0, 0); +} + +void nv50_screen_relocs(struct nv50_screen *screen) { struct nouveau_channel *chan = screen->base.channel; @@ -243,12 +278,7 @@ nv50_screen_relocs(struct nv50_screen *screen) OUT_RELOCh(chan, screen->tsc, 0, rl); OUT_RELOCl(chan, screen->tsc, 0, rl); - BGN_RELOC (chan, screen->constbuf_misc[0], - tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); - OUT_RELOCh(chan, screen->constbuf_misc[0], 0, rl); - OUT_RELOCl(chan, screen->constbuf_misc[0], 0, rl); - OUT_RELOC (chan, screen->constbuf_misc[0], - (NV50_CB_PMISC << 16) | 0x0200, rl, 0, 0); + nv50_screen_reloc_constbuf(screen, NV50_CB_PMISC); BGN_RELOC (chan, screen->constbuf_misc[0], tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); @@ -257,14 +287,21 @@ nv50_screen_relocs(struct nv50_screen *screen) OUT_RELOC (chan, screen->constbuf_misc[0], (NV50_CB_AUX << 16) | 0x0200, rl, 0, 0); - for (i = 0; i < 3; ++i) { - BGN_RELOC (chan, screen->constbuf_parm[i], - tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); - OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl); - OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl); - OUT_RELOC (chan, screen->constbuf_parm[i], - ((NV50_CB_PVP + i) << 16) | 0x0000, rl, 0, 0); - } + for (i = 0; i < 3; ++i) + nv50_screen_reloc_constbuf(screen, NV50_CB_PVP + i); + + BGN_RELOC (chan, screen->stack_bo, + tesla, NV50TCL_STACK_ADDRESS_HIGH, 2, rl); + OUT_RELOCh(chan, screen->stack_bo, 0, rl); + OUT_RELOCl(chan, screen->stack_bo, 0, rl); + + if (!screen->cur_ctx->req_lmem) + return; + + BGN_RELOC (chan, screen->local_bo, + tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 2, rl); + OUT_RELOCh(chan, screen->local_bo, 0, rl); + OUT_RELOCl(chan, screen->local_bo, 0, rl); } #ifndef NOUVEAU_GETPARAM_GRAPH_UNITS @@ -425,6 +462,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); OUT_RING (chan, 8); + BEGIN_RING(chan, screen->tesla, NV50TCL_CLEAR_FLAGS, 1); + OUT_RING (chan, NV50TCL_CLEAR_FLAGS_D3D); + /* constant buffers for immediates and VP/FP parameters */ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4, &screen->constbuf_misc[0]); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index ad6bdeb27c..6e15230b48 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -39,6 +39,8 @@ nv50_screen(struct pipe_screen *screen) extern void nv50_screen_relocs(struct nv50_screen *); +extern void nv50_screen_reloc_constbuf(struct nv50_screen *, unsigned cbi); + struct nv50_format { uint32_t rt; uint32_t tic; diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index 8c1a5999cf..6c41e8f456 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -47,10 +47,17 @@ nv50_transfer_constbuf(struct nv50_context *nv50, start = 0; while (count) { - unsigned nr = count; - nr = MIN2(nr, 2047); + unsigned nr = AVAIL_RING(chan); + + if (nr < 8) { + FIRE_RING(chan); + continue; + } + nr = MIN2(count, nr - 7); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + nv50_screen_reloc_constbuf(nv50->screen, cbi); - /* FIXME: emit relocs for unsuiTed MM */ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); OUT_RING (chan, (start << 8) | cbi); BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr); @@ -77,8 +84,16 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) unsigned start = 0; while (count) { - unsigned nr = count; - nr = MIN2(nr, 2047); + unsigned nr = AVAIL_RING(chan); + + if (nr < 8) { + FIRE_RING(chan); + continue; + } + nr = MIN2(count, nr - 7); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + nv50_screen_reloc_constbuf(nv50->screen, NV50_CB_PMISC); BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); OUT_RING (chan, (start << 8) | NV50_CB_PMISC); @@ -111,8 +126,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) break; default: assert(0); - cbi = 0; - break; + return; } nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi); @@ -281,6 +295,17 @@ nv50_program_validate(struct nv50_program *p) return p->translated; } +static INLINE void +nv50_program_validate_common(struct nv50_context *nv50, struct nv50_program *p) +{ + nv50_program_validate_code(nv50, p); + + if (p->uses_lmem) + nv50->req_lmem |= 1 << p->type; + else + nv50->req_lmem &= ~(1 << p->type); +} + struct nouveau_stateobj * nv50_vertprog_validate(struct nv50_context *nv50) { @@ -300,7 +325,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) if (!(nv50->dirty & NV50_NEW_VERTPROG)) return NULL; - nv50_program_validate_code(nv50, p); + nv50_program_validate_common(nv50, p); so_ref(p->so, &so); return so; @@ -325,7 +350,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) if (!(nv50->dirty & NV50_NEW_FRAGPROG)) return NULL; - nv50_program_validate_code(nv50, p); + nv50_program_validate_common(nv50, p); so_ref(p->so, &so); return so; @@ -337,6 +362,10 @@ nv50_geomprog_validate(struct nv50_context *nv50) struct nv50_program *p = nv50->geomprog; struct nouveau_stateobj *so = NULL; + /* GP may be NULL, but VP and FP may not */ + if (!p) + return NULL; /* GP is deactivated in linkage validation */ + if (!p->translated) { if (nv50_program_validate(p)) nv50_gp_update_stateobj(nv50, p); @@ -350,7 +379,7 @@ nv50_geomprog_validate(struct nv50_context *nv50) if (!(nv50->dirty & NV50_NEW_GEOMPROG)) return NULL; - nv50_program_validate_code(nv50, p); + nv50_program_validate_common(nv50, p); so_ref(p->so, &so); return so; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 3afce06557..f42fa2d4d2 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -663,7 +663,7 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv50_context *nv50 = nv50_context(pipe); - nv50->fragprog = hwcso; + nv50->geomprog = hwcso; nv50->dirty |= NV50_NEW_GEOMPROG; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f1d8202dff..16c2dab9af 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -333,7 +333,7 @@ struct state_validate { { validate_vtxbuf , NV50_NEW_ARRAYS }, { validate_vtxattr , NV50_NEW_ARRAYS }, { validate_clip , NV50_NEW_CLIP }, - {} + { NULL , 0 } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 3e61203adf..f70c138fe1 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -52,48 +52,26 @@ nv50_2d_format_faithful(enum pipe_format format) } } -static INLINE int -nv50_format(enum pipe_format format) +static INLINE uint8_t +nv50_2d_format(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM; - case PIPE_FORMAT_B8G8R8X8_UNORM: - return NV50_2D_DST_FORMAT_X8R8G8B8_UNORM; - case PIPE_FORMAT_B8G8R8A8_SRGB: - return NV50_2D_DST_FORMAT_A8R8G8B8_SRGB; - case PIPE_FORMAT_B8G8R8X8_SRGB: - return NV50_2D_DST_FORMAT_X8R8G8B8_SRGB; - case PIPE_FORMAT_B5G6R5_UNORM: - return NV50_2D_DST_FORMAT_R5G6B5_UNORM; - case PIPE_FORMAT_B5G5R5A1_UNORM: - return NV50_2D_DST_FORMAT_A1R5G5B5_UNORM; - case PIPE_FORMAT_B10G10R10A2_UNORM: - return NV50_2D_DST_FORMAT_A2R10G10B10_UNORM; - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_R8_UNORM: + uint8_t id = nv50_format_table[format].rt; + + /* Hardware values for color formats range from 0xc0 to 0xff, + * but the 2D engine doesn't support all of them. + */ + if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) + return id; + + switch (util_format_get_blocksize(format)) { + case 1: return NV50_2D_DST_FORMAT_R8_UNORM; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return NV50_2D_DST_FORMAT_R32G32B32A32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return NV50_2D_DST_FORMAT_R32G32B32X32_FLOAT; - case PIPE_FORMAT_Z32_FLOAT: - return NV50_2D_DST_FORMAT_R32_FLOAT; - - /* only because we require src format == dst format: */ - case PIPE_FORMAT_R16G16_SNORM: - case PIPE_FORMAT_R16G16_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM; - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_B4G4R4A4_UNORM: + case 2: return NV50_2D_DST_FORMAT_R16_UNORM; - + case 4: + return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM; default: - return -1; + return 0; } } @@ -107,14 +85,14 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) int format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); - format = nv50_format(ps->format); - if (format < 0) { + format = nv50_2d_format(ps->format); + if (!format) { NOUVEAU_ERR("invalid/unsupported surface format: %s\n", util_format_name(ps->format)); return 1; } - if (!bo->tile_flags) { + if (!nouveau_bo_tile_layout(bo)) { BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); @@ -221,7 +199,6 @@ nv50_surface_copy(struct pipe_context *pipe, nv50_miptree_surface_del(ps_dst); } -/* XXX this should probably look more along the lines of nv50_clear */ static void nv50_clear_render_target(struct pipe_context *pipe, struct pipe_surface *dst, @@ -231,34 +208,99 @@ nv50_clear_render_target(struct pipe_context *pipe, { struct nv50_context *nv50 = nv50_context(pipe); struct nv50_screen *screen = nv50->screen; - struct nouveau_channel *chan = screen->eng2d->channel; - struct nouveau_grobj *eng2d = screen->eng2d; - int format, ret; - union util_color uc; - util_pack_color(rgba, dst->format, &uc); + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *tesla = screen->tesla; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nouveau_bo *bo = mt->base.bo; - format = nv50_format(dst->format); - if (format < 0) - return; + BEGIN_RING(chan, tesla, NV50TCL_CLEAR_COLOR(0), 4); + OUT_RINGf (chan, rgba[0]); + OUT_RINGf (chan, rgba[1]); + OUT_RINGf (chan, rgba[2]); + OUT_RINGf (chan, rgba[3]); - ret = MARK_RING (chan, 16 + 32, 2); - if (ret) + if (MARK_RING(chan, 18, 2)) return; - ret = nv50_surface_set(screen, dst, 1); - if (ret) + BEGIN_RING(chan, tesla, NV50TCL_RT_CONTROL, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, tesla, NV50TCL_RT_ADDRESS_HIGH(0), 5); + OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, nv50_format_table[dst->format].rt); + OUT_RING (chan, mt->level[dst->level].tile_mode << 4); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2); + OUT_RING (chan, dst->width); + OUT_RING (chan, dst->height); + BEGIN_RING(chan, tesla, NV50TCL_RT_ARRAY_MODE, 1); + OUT_RING (chan, 1); + + /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ + + BEGIN_RING(chan, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); + OUT_RING (chan, (width << 16) | dstx); + OUT_RING (chan, (height << 16) | dsty); + + BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1); + OUT_RING (chan, 0x3c); + + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +static void +nv50_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_screen *screen = nv50->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *tesla = screen->tesla; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nouveau_bo *bo = mt->base.bo; + uint32_t mode = 0; + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_RING(chan, tesla, NV50TCL_CLEAR_DEPTH, 1); + OUT_RINGf (chan, depth); + mode |= NV50TCL_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_RING(chan, tesla, NV50TCL_CLEAR_STENCIL, 1); + OUT_RING (chan, stencil & 0xff); + mode |= NV50TCL_CLEAR_BUFFERS_S; + } + + if (MARK_RING(chan, 17, 2)) return; - BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3); - OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES); - OUT_RING (chan, format); - OUT_RING (chan, uc.ui); - BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4); - OUT_RING (chan, dstx); - OUT_RING (chan, dsty); - OUT_RING (chan, width); - OUT_RING (chan, height); + BEGIN_RING(chan, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); + OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, nv50_format_table[dst->format].rt); + OUT_RING (chan, mt->level[dst->level].tile_mode << 4); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, NV50TCL_ZETA_ENABLE, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, tesla, NV50TCL_ZETA_HORIZ, 3); + OUT_RING (chan, dst->width); + OUT_RING (chan, dst->height); + OUT_RING (chan, (1 << 16) | 1); + + BEGIN_RING(chan, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); + OUT_RING (chan, (width << 16) | dstx); + OUT_RING (chan, (height << 16) | dsty); + + BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1); + OUT_RING (chan, mode); + nv50->dirty |= NV50_NEW_FRAMEBUFFER; } void @@ -266,6 +308,7 @@ nv50_init_surface_functions(struct nv50_context *nv50) { nv50->pipe.resource_copy_region = nv50_surface_copy; nv50->pipe.clear_render_target = nv50_clear_render_target; + nv50->pipe.clear_depth_stencil = nv50_clear_depth_stencil; } diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index f973cf24b9..0cc2f4a837 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -45,7 +45,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, WAIT_RING (chan, 14); - if (!src_bo->tile_flags) { + if (!nouveau_bo_tile_layout(src_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); @@ -64,7 +64,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, sz); /* copying only 1 zslice per call */ } - if (!dst_bo->tile_flags) { + if (!nouveau_bo_tile_layout(dst_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); @@ -95,14 +95,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); - if (src_bo->tile_flags) { + if (nouveau_bo_tile_layout(src_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); OUT_RING (chan, (sy << 16) | (sx * cpp)); } else { src_offset += (line_count * src_pitch); } - if (dst_bo->tile_flags) { + if (nouveau_bo_tile_layout(dst_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); OUT_RING (chan, (dy << 16) | (dx * cpp)); @@ -280,7 +280,7 @@ nv50_upload_sifc(struct nv50_context *nv50, MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */ - if (bo->tile_flags) { + if (nouveau_bo_tile_layout(bo)) { BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5); OUT_RING (chan, dst_format); OUT_RING (chan, 0); diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index d97cab8db1..13e8beed47 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1235,10 +1235,9 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) struct nouveau_channel* chan = nvfx->screen->base.channel; struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog; struct nvfx_vertex_program* vp; - /* Gallium always puts the point coord in GENERIC[0] - * TODO: this is wrong, Gallium needs to be fixed - */ - unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * (nvfx->rasterizer->pipe.sprite_coord_enable | 1); + + // TODO: the multiplication by point_quad_rasterization is probably superfluous + unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * nvfx->rasterizer->pipe.sprite_coord_enable; boolean emulate_sprite_flipping = sprite_coord_enable && nvfx->rasterizer->pipe.sprite_coord_mode; unsigned key = emulate_sprite_flipping; @@ -1297,7 +1296,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) unsigned used_texcoords = 0; for(unsigned i = 0; i < fp->num_slots; ++i) { unsigned generic = fp->slot_to_generic[i]; - if(!((1 << generic) & sprite_coord_enable)) + if((generic < 32) && !((1 << generic) & sprite_coord_enable)) { unsigned char slot_mask = vp->generic_to_fp_input[generic]; if(slot_mask >= 0xf0) @@ -1320,7 +1319,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) for(i = 0; i < fp->num_slots; ++i) { unsigned generic = fp->slot_to_generic[i]; - if((1 << generic) & sprite_coord_enable) + if((generic < 32) && ((1 << generic) & sprite_coord_enable)) { if(fp->slot_to_fp_input[i] != sprite_reloc_input) goto update_slots; @@ -1346,7 +1345,7 @@ update_slots: for(; i < fp->num_slots; ++i) { unsigned generic = fp->slot_to_generic[i]; - if((1 << generic) & sprite_coord_enable) + if((generic < 32) && ((1 << generic) & sprite_coord_enable)) fp->slot_to_fp_input[i] = sprite_reloc_input; else fp->slot_to_fp_input[i] = vp->generic_to_fp_input[generic] & 0xf; @@ -1559,7 +1558,7 @@ nvfx_fragprog_destroy(struct nvfx_context *nvfx, struct nvfx_fragment_program_bo* next = fpbo->next; nouveau_bo_unmap(fpbo->bo); nouveau_bo_ref(0, &fpbo->bo); - free(fpbo); + os_free_aligned(fpbo); fpbo = next; } while(fpbo != fp->fpbo); diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 3f177b7ed0..8024800bd0 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -194,6 +194,8 @@ nvfx_screen_is_format_supported(struct pipe_screen *pscreen, switch (format) { case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_B5G6R5_UNORM: break; case PIPE_FORMAT_R16G16B16A16_FLOAT: diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index b767846a99..54619037d8 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -305,7 +305,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct nvfx_context *nvfx = nvfx_context(pipe); nvfx->constbuf[shader] = buf; - nvfx->constbuf_nr[shader] = buf->width0 / (4 * sizeof(float)); + nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0; if (shader == PIPE_SHADER_VERTEX) { nvfx->dirty |= NVFX_NEW_VERTCONST; diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index 4ffc4de452..30e48c8073 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -140,6 +140,12 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) case 0: rt_format |= NV30_3D_RT_FORMAT_COLOR_A8R8G8B8; break; + case PIPE_FORMAT_R8G8B8X8_UNORM: + rt_format |= NV30_3D_RT_FORMAT_COLOR_X8B8G8R8; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + rt_format |= NV30_3D_RT_FORMAT_COLOR_A8B8G8R8; + break; case PIPE_FORMAT_B5G6R5_UNORM: rt_format |= NV30_3D_RT_FORMAT_COLOR_R5G6B5; break; diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 23f045ecf6..e543fda50e 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -377,6 +377,8 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { src.swz[2] = fsrc->Register.SwizzleZ; src.swz[3] = fsrc->Register.SwizzleW; src.indirect = 0; + src.indirect_reg = 0; + src.indirect_swz = 0; if(fsrc->Register.Indirect) { if(fsrc->Indirect.File == TGSI_FILE_ADDRESS && @@ -973,7 +975,7 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE) static struct nvfx_vertex_program* -nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps, const struct tgsi_shader_info* info) +nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps, struct tgsi_shader_info* info) { struct tgsi_parse_context parse; struct nvfx_vertex_program* vp = NULL; diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 728bc40a5b..66d900ebb5 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -39,5 +39,6 @@ EXTRA_OBJECTS = \ include ../../Makefile.template +.PHONY: $(COMPILER_ARCHIVE) $(COMPILER_ARCHIVE): $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 7f655dbfd2..b59bc00261 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -125,6 +125,8 @@ struct r300_gpu_flush { uint32_t cb_flush_clean[6]; }; +#define RS_STATE_MAIN_SIZE 23 + struct r300_rs_state { /* Original rasterizer state. */ struct pipe_rasterizer_state rs; @@ -132,7 +134,7 @@ struct r300_rs_state { struct pipe_rasterizer_state rs_draw; /* Command buffers. */ - uint32_t cb_main[25]; + uint32_t cb_main[RS_STATE_MAIN_SIZE]; uint32_t cb_poly_offset_zb16[5]; uint32_t cb_poly_offset_zb24[5]; @@ -150,6 +152,7 @@ struct r300_rs_block { uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ + uint32_t gb_enable; uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */ uint32_t count; /* R300_RS_COUNT */ @@ -162,7 +165,6 @@ struct r300_sampler_state { uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */ - uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ /* Min/max LOD must be clamped to [0, last_level], thus * it's dependent on a currently bound texture */ @@ -334,6 +336,13 @@ struct r300_texture_desc { /* Parent class. */ struct u_resource b; + /* Width, height, and depth. + * Most of the time, these are equal to pipe_texture::width0, height0, + * and depth0. However, NPOT 3D textures must have dimensions aligned + * to POT, and this is the only case when these variables differ from + * pipe_texture. */ + unsigned width0, height0, depth0; + /* Buffer tiling. * Macrotiling is specified per-level because small mipmaps cannot * be macrotiled. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index b2b34c3efc..3a1085d2dc 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -89,7 +89,7 @@ static const float * get_rc_constant_state( { struct r300_textures_state* texstate = r300->textures_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; - struct pipe_resource *tex; + struct r300_texture *tex; assert(constant->Type == RC_CONSTANT_STATE); @@ -97,9 +97,17 @@ static const float * get_rc_constant_state( /* Factor for converting rectangle coords to * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: - tex = texstate->sampler_views[constant->u.State[1]]->base.texture; - vec[0] = 1.0 / tex->width0; - vec[1] = 1.0 / tex->height0; + tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); + vec[0] = 1.0 / tex->desc.width0; + vec[1] = 1.0 / tex->desc.height0; + break; + + case RC_STATE_R300_TEXSCALE_FACTOR: + tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); + /* Add a small number to the texture size to work around rounding errors in hw. */ + vec[0] = tex->desc.b.b.width0 / (tex->desc.width0 + 0.001f); + vec[1] = tex->desc.b.b.height0 / (tex->desc.height0 + 0.001f); + vec[2] = tex->desc.b.b.depth0 / (tex->desc.depth0 + 0.001f); break; case RC_STATE_R300_VIEWPORT_SCALE: @@ -667,7 +675,7 @@ void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) CS_LOCALS(r300); BEGIN_CS(size); - OUT_CS_TABLE(rs->cb_main, 25); + OUT_CS_TABLE(rs->cb_main, RS_STATE_MAIN_SIZE); if (rs->polygon_offset_enable) { if (r300->zbuffer_bpp == 16) { OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5); @@ -709,6 +717,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); OUT_CS(rs->vap_out_vtx_fmt[0]); OUT_CS(rs->vap_out_vtx_fmt[1]); + OUT_CS_REG_SEQ(R300_GB_ENABLE, 1); + OUT_CS(rs->gb_enable); if (r300->screen->caps.is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index b8dab88ef0..d9d4a9304d 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -150,12 +150,16 @@ static void get_external_state( unsigned char *swizzle; for (i = 0; i < texstate->sampler_state_count; i++) { - struct r300_sampler_state* s = texstate->sampler_states[i]; + struct r300_sampler_state *s = texstate->sampler_states[i]; + struct r300_sampler_view *v = texstate->sampler_views[i]; + struct r300_texture *t; - if (!s) { + if (!s || !v) { continue; } + t = r300_texture(texstate->sampler_views[i]->base.texture); + if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { state->unit[i].compare_mode_enabled = 1; @@ -176,35 +180,29 @@ static void get_external_state( state->unit[i].non_normalized_coords = !s->state.normalized_coords; - if (texstate->sampler_views[i]) { - struct r300_texture *t; - t = (struct r300_texture*)texstate->sampler_views[i]->base.texture; - - /* XXX this should probably take into account STR, not just S. */ - if (t->desc.is_npot) { - switch (s->state.wrap_s) { - case PIPE_TEX_WRAP_REPEAT: - state->unit[i].wrap_mode = RC_WRAP_REPEAT; - state->unit[i].fake_npot = TRUE; - break; - - case PIPE_TEX_WRAP_MIRROR_REPEAT: - state->unit[i].wrap_mode = RC_WRAP_MIRRORED_REPEAT; - state->unit[i].fake_npot = TRUE; - break; - - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - state->unit[i].wrap_mode = RC_WRAP_MIRRORED_CLAMP; - state->unit[i].fake_npot = TRUE; - break; - - default: - state->unit[i].wrap_mode = RC_WRAP_NONE; - break; - } + /* XXX this should probably take into account STR, not just S. */ + if (t->desc.is_npot) { + switch (s->state.wrap_s) { + case PIPE_TEX_WRAP_REPEAT: + state->unit[i].wrap_mode = RC_WRAP_REPEAT; + break; + + case PIPE_TEX_WRAP_MIRROR_REPEAT: + state->unit[i].wrap_mode = RC_WRAP_MIRRORED_REPEAT; + break; + + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + state->unit[i].wrap_mode = RC_WRAP_MIRRORED_CLAMP; + break; + + default: + state->unit[i].wrap_mode = RC_WRAP_NONE; } + + if (t->desc.b.b.target == PIPE_TEXTURE_3D) + state->unit[i].clamp_and_scale_before_fetch = TRUE; } } } diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 5b0121ce9e..5f34fcb274 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -158,7 +158,7 @@ static void r300_render_condition(struct pipe_context *pipe, uint mode) { struct r300_context *r300 = r300_context(pipe); - uint64_t result; + uint64_t result = 0; boolean wait; if (query) { @@ -167,9 +167,9 @@ static void r300_render_condition(struct pipe_context *pipe, if (!r300_get_query_result(pipe, query, wait, &result)) { r300->skip_rendering = FALSE; + } else { + r300->skip_rendering = result == 0; } - - r300->skip_rendering = result == 0; } else { r300->skip_rendering = FALSE; } diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index 0ea11e5bfc..9247064508 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -29,6 +29,7 @@ #include "r300_context.h" #include "translate/translate.h" +#include "util/u_index_modify.h" void r300_begin_vertex_translate(struct r300_context *r300) { @@ -188,111 +189,6 @@ void r300_end_vertex_translate(struct r300_context *r300) NULL); } -static void r300_shorten_ubyte_elts(struct r300_context* r300, - struct pipe_resource** elts, - int index_bias, - unsigned start, - unsigned count) -{ - struct pipe_context* context = &r300->context; - struct pipe_screen* screen = r300->context.screen; - struct pipe_resource* new_elts; - unsigned char *in_map; - unsigned short *out_map; - struct pipe_transfer *src_transfer, *dst_transfer; - unsigned i; - - new_elts = pipe_buffer_create(screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); - out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); - - in_map += start; - - for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, src_transfer); - pipe_buffer_unmap(context, new_elts, dst_transfer); - - *elts = new_elts; -} - -static void r300_rebuild_ushort_elts(struct r300_context *r300, - struct pipe_resource **elts, - int index_bias, - unsigned start, unsigned count) -{ - struct pipe_context *context = &r300->context; - struct pipe_transfer *in_transfer = NULL; - struct pipe_transfer *out_transfer = NULL; - struct pipe_resource *new_elts; - unsigned short *in_map; - unsigned short *out_map; - unsigned i; - - new_elts = pipe_buffer_create(context->screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); - out_map = pipe_buffer_map(context, new_elts, - PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; -} - -static void r300_rebuild_uint_elts(struct r300_context *r300, - struct pipe_resource **elts, - int index_bias, - unsigned start, unsigned count) -{ - struct pipe_context *context = &r300->context; - struct pipe_transfer *in_transfer = NULL; - struct pipe_transfer *out_transfer = NULL; - struct pipe_resource *new_elts; - unsigned int *in_map; - unsigned int *out_map; - unsigned i; - - new_elts = pipe_buffer_create(context->screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); - out_map = pipe_buffer_map(context, new_elts, - PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned int)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; -} - void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, @@ -300,21 +196,21 @@ void r300_translate_index_buffer(struct r300_context *r300, { switch (*index_size) { case 1: - r300_shorten_ubyte_elts(r300, index_buffer, index_offset, *start, count); + util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count); *index_size = 2; *start = 0; break; case 2: if (*start % 2 != 0 || index_offset) { - r300_rebuild_ushort_elts(r300, index_buffer, index_offset, *start, count); + util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count); *start = 0; } break; case 4: if (index_offset) { - r300_rebuild_uint_elts(r300, index_buffer, index_offset, *start, count); + util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count); *start = 0; } break; diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 7f41ff0e2e..b448924f85 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -124,6 +124,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: + case PIPE_CAP_SHADER_STENCIL_EXPORT: return 0; /* Texturing. */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 8ccb63964e..f2479a994c 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -922,7 +922,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, const struct pipe_rasterizer_state* state) { struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state); - int i; float psiz; uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ @@ -935,10 +934,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ - /* Specifies top of Raster pipe specific enable controls, - * i.e. texture coordinates stuffing for points, lines, triangles */ - uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */ - /* Point sprites texture coordinates, 0: lower left, 1: upper right */ float point_texcoord_left = 0; /* R300_GA_POINT_S0: 0x4200 */ float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */ @@ -950,10 +945,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->rs = *state; rs->rs_draw = *state; - /* Generate point sprite texture coordinates in GENERIC0 - * if point_quad_rasterization is TRUE. */ rs->rs.sprite_coord_enable = state->point_quad_rasterization * - (state->sprite_coord_enable | 1); + state->sprite_coord_enable; /* Override some states for Draw. */ rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */ @@ -1054,16 +1047,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe, clip_rule = state->scissor ? 0xAAAA : 0xFFFF; - /* Point sprites */ - stuffing_enable = 0; + /* Point sprites coord mode */ if (rs->rs.sprite_coord_enable) { - stuffing_enable = R300_GB_POINT_STUFF_ENABLE; - for (i = 0; i < 8; i++) { - if (rs->rs.sprite_coord_enable & (1 << i)) - stuffing_enable |= - R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); - } - switch (state->sprite_coord_mode) { case PIPE_SPRITE_COORD_UPPER_LEFT: point_texcoord_top = 0.0f; @@ -1077,7 +1062,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe, } /* Build the main command buffer. */ - BEGIN_CB(rs->cb_main, 25); + BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE); OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status); OUT_CB_REG(R300_GA_POINT_SIZE, point_size); OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2); @@ -1091,7 +1076,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value); OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode); OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule); - OUT_CB_REG(R300_GB_ENABLE, stuffing_enable); OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4); OUT_CB_32F(point_texcoord_left); OUT_CB_32F(point_texcoord_bottom); @@ -1149,7 +1133,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } UPDATE_STATE(state, r300->rs_state); - r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0); + r300->rs_state.size = RS_STATE_MAIN_SIZE + (r300->polygon_offset_enabled ? 5 : 0); if (last_sprite_coord_enable != r300->sprite_coord_enable || last_two_sided_color != r300->two_sided_color) { @@ -1171,7 +1155,6 @@ static void* struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state); boolean is_r500 = r300->screen->caps.is_r500; int lod_bias; - union util_color uc; sampler->state = *state; @@ -1228,9 +1211,6 @@ static void* sampler->filter1 |= r500_anisotropy(state->max_anisotropy); } - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - sampler->border_color = uc.ui; - /* R500-specific fixups and optimizations */ if (r300->screen->caps.is_r500) { sampler->filter1 |= R500_BORDER_FIX; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index f9a516825d..904736ef06 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -324,6 +324,7 @@ static void r300_update_rs_block(struct r300_context *r300) boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || vs_outputs->bcolor[1] != ATTR_UNUSED; int *stream_loc_notcl = r300->stream_loc_notcl; + uint32_t stuffing_enable = 0; if (r300->screen->caps.is_r500) { rX00_rs_col = r500_rs_col; @@ -436,7 +437,11 @@ static void r300_update_rs_block(struct r300_context *r300) /* Rasterize texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { - bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); + bool sprite_coord = false; + + if (fs_inputs->generic[i] != ATTR_UNUSED) { + sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); + } if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) { if (!sprite_coord) { @@ -444,7 +449,9 @@ static void r300_update_rs_block(struct r300_context *r300) rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); stream_loc_notcl[loc++] = 6 + tex_count; - } + } else + stuffing_enable |= + R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (tex_count*2)); /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_ptr, @@ -456,8 +463,8 @@ static void r300_update_rs_block(struct r300_context *r300) fp_offset++; DBG(r300, DBG_RS, - "r300: Rasterized generic %i written to FS%s.\n", - i, sprite_coord ? " (sprite coord)" : ""); + "r300: Rasterized generic %i written to FS%s in texcoord %d.\n", + i, sprite_coord ? " (sprite coord)" : "", tex_count); } else { DBG(r300, DBG_RS, "r300: Rasterized generic %i unused%s.\n", @@ -560,13 +567,72 @@ static void r300_update_rs_block(struct r300_context *r300) count = MAX3(col_count, tex_count, 1); rs.inst_count = count - 1; + /* set the GB enable flags */ + if (r300->sprite_coord_enable) + stuffing_enable |= R300_GB_POINT_STUFF_ENABLE; + + rs.gb_enable = stuffing_enable; + /* Now, after all that, see if we actually need to update the state. */ if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); - r300->rs_block_state.size = 11 + count*2; + r300->rs_block_state.size = 13 + count*2; } } +static uint32_t r300_get_border_color(enum pipe_format format, + const float border[4]) +{ + const struct util_format_description *desc; + float border_swizzled[4] = { + border[2], + border[1], + border[0], + border[3] + }; + uint32_t r; + + desc = util_format_description(format); + + /* We don't use util_pack_format because it does not handle the formats + * we want, e.g. R4G4B4A4 is non-existent in Gallium. */ + switch (desc->channel[0].size) { + case 4: + r = ((float_to_ubyte(border_swizzled[0]) & 0xf0) >> 4) | + ((float_to_ubyte(border_swizzled[1]) & 0xf0) << 0) | + ((float_to_ubyte(border_swizzled[2]) & 0xf0) << 4) | + ((float_to_ubyte(border_swizzled[3]) & 0xf0) << 8); + break; + + case 5: + if (desc->channel[1].size == 5) { + r = ((float_to_ubyte(border_swizzled[0]) & 0xf8) >> 3) | + ((float_to_ubyte(border_swizzled[1]) & 0xf8) << 2) | + ((float_to_ubyte(border_swizzled[2]) & 0xf8) << 7) | + ((float_to_ubyte(border_swizzled[3]) & 0x80) << 8); + } else if (desc->channel[1].size == 6) { + r = ((float_to_ubyte(border_swizzled[0]) & 0xf8) >> 3) | + ((float_to_ubyte(border_swizzled[1]) & 0xfc) << 3) | + ((float_to_ubyte(border_swizzled[2]) & 0xf8) << 8); + } else { + assert(0); + r = 0; + } + break; + + default: + /* I think the fat formats (16, 32) are specified + * as the 8-bit ones. I am not sure how compressed formats + * work here. */ + r = ((float_to_ubyte(border_swizzled[0]) & 0xff) << 0) | + ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) | + ((float_to_ubyte(border_swizzled[2]) & 0xff) << 16) | + ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24); + } + + return r; +} + static void r300_merge_textures_and_samplers(struct r300_context* r300) { struct r300_textures_state *state = @@ -599,7 +665,11 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->format = view->format; texstate->filter0 = sampler->filter0; texstate->filter1 = sampler->filter1; - texstate->border_color = sampler->border_color; + + /* Set the border color. */ + texstate->border_color = + r300_get_border_color(view->base.format, + sampler->state.border_color); /* determine min/max levels */ max_level = MIN3(sampler->max_lod + view->base.first_level, diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 03ec127ff7..7e501221b1 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -364,6 +364,7 @@ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { uint32_t result = 0; const struct util_format_description *desc; + unsigned i; desc = util_format_description(format); @@ -371,10 +372,17 @@ r300_translate_vertex_data_type(enum pipe_format format) { return R300_INVALID_FORMAT; } - switch (desc->channel[0].type) { + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { /* Half-floats, floats, doubles */ case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 16: /* Supported only on RV350 and later. */ if (desc->nr_channels > 2) { @@ -394,7 +402,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { case UTIL_FORMAT_TYPE_UNSIGNED: /* Signed ints */ case UTIL_FORMAT_TYPE_SIGNED: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 8: result = R300_DATA_TYPE_BYTE; break; @@ -413,10 +421,10 @@ r300_translate_vertex_data_type(enum pipe_format format) { return R300_INVALID_FORMAT; } - if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { result |= R300_SIGNED; } - if (desc->channel[0].normalized) { + if (desc->channel[i].normalized) { result |= R300_NORMALIZE; } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 34105aa4bc..cee56bccdc 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -260,16 +260,26 @@ uint32_t r300_translate_texformat(enum pipe_format format, return ~0; /* Unsupported/unknown. */ } + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + return ~0; /* Unsupported/unknown. */ + /* And finally, uniform formats. */ - switch (desc->channel[0].type) { + switch (desc->channel[i].type) { case UTIL_FORMAT_TYPE_UNSIGNED: case UTIL_FORMAT_TYPE_SIGNED: - if (!desc->channel[0].normalized && + if (!desc->channel[i].normalized && desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { return ~0; } - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 4: switch (desc->nr_channels) { case 2: @@ -303,7 +313,7 @@ uint32_t r300_translate_texformat(enum pipe_format format, return ~0; case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 16: switch (desc->nr_channels) { case 1: @@ -359,6 +369,11 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) return R300_COLOR_FORMAT_I8; /* 16-bit buffers. */ + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8_SNORM: + return R300_COLOR_FORMAT_UV88; + case PIPE_FORMAT_B5G6R5_UNORM: return R300_COLOR_FORMAT_RGB565; @@ -443,15 +458,25 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) desc = util_format_description(format); + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + return ~0; /* Unsupported/unknown. */ + /* Specifies how the shader output is written to the fog unit. */ - if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) { - if (desc->channel[0].size == 32) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { + if (desc->channel[i].size == 32) { modifier |= R300_US_OUT_FMT_C4_32_FP; } else { modifier |= R300_US_OUT_FMT_C4_16_FP; } } else { - if (desc->channel[0].size == 16) { + if (desc->channel[i].size == 16) { modifier |= R300_US_OUT_FMT_C4_16; } else { /* C4_8 seems to be used for the formats whose pixel size @@ -468,7 +493,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* Add swizzles and return. */ switch (format) { - /* 8-bit outputs. + /* 8-bit outputs, one channel. * COLORFORMAT_I8 stores the C2 component. */ case PIPE_FORMAT_A8_UNORM: return modifier | R300_C2_SEL_A; @@ -478,6 +503,14 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_R8_SNORM: return modifier | R300_C2_SEL_R; + /* 16-bit outputs, two channels. + * COLORFORMAT_UV88 stores C2 and C0. */ + case PIPE_FORMAT_L8A8_UNORM: + return modifier | R300_C0_SEL_A | R300_C2_SEL_R; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8_SNORM: + return modifier | R300_C0_SEL_G | R300_C2_SEL_R; + /* BGRA outputs. */ case PIPE_FORMAT_B5G6R5_UNORM: case PIPE_FORMAT_B5G5R5A1_UNORM: @@ -556,18 +589,15 @@ void r300_texture_setup_format_state(struct r300_screen *screen, out->tile_config = 0; /* Set sampler state. */ - out->format0 = R300_TX_WIDTH((u_minify(pt->width0, level) - 1) & 0x7ff) | - R300_TX_HEIGHT((u_minify(pt->height0, level) - 1) & 0x7ff); + out->format0 = + R300_TX_WIDTH((u_minify(desc->width0, level) - 1) & 0x7ff) | + R300_TX_HEIGHT((u_minify(desc->height0, level) - 1) & 0x7ff) | + R300_TX_DEPTH(util_logbase2(u_minify(desc->depth0, level)) & 0xf); if (desc->uses_stride_addressing) { /* rectangles love this */ out->format0 |= R300_TX_PITCH_EN; out->format2 = (desc->stride_in_pixels[level] - 1) & 0x1fff; - } else { - /* Power of two textures (3D, mipmaps, and no pitch), - * also NPOT textures with a width being POT. */ - out->format0 |= - R300_TX_DEPTH(util_logbase2(u_minify(pt->depth0, level)) & 0xf); } if (pt->target == PIPE_TEXTURE_CUBE) { @@ -580,10 +610,10 @@ void r300_texture_setup_format_state(struct r300_screen *screen, /* large textures on r500 */ if (is_r500) { - if (pt->width0 > 2048) { + if (desc->width0 > 2048) { out->format2 |= R500_TXWIDTH_BIT11; } - if (pt->height0 > 2048) { + if (desc->height0 > 2048) { out->format2 |= R500_TXHEIGHT_BIT11; } } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 112282a0a6..543d0fdc15 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -44,7 +44,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ - {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ + {{ 4, 1}, { 2, 2}, { 0, 0}}, /* 64 bits per pixel */ {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ }, { @@ -53,7 +53,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ - {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ + {{ 32, 8}, {16, 16}, { 0, 0}}, /* 64 bits per pixel */ {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ } }; @@ -91,9 +91,9 @@ static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, desc->microtile, R300_BUFFER_TILED, dim); if (dim == DIM_WIDTH) { - texdim = u_minify(desc->b.b.width0, level); + texdim = u_minify(desc->width0, level); } else { - texdim = u_minify(desc->b.b.height0, level); + texdim = u_minify(desc->height0, level); } /* See TX_FILTER1_n.MACRO_SWITCH. */ @@ -124,7 +124,7 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, return 0; } - width = u_minify(desc->b.b.width0, level); + width = u_minify(desc->width0, level); if (util_format_is_plain(desc->b.b.format)) { tile_width = r300_get_pixel_alignment(desc->b.b.format, @@ -172,7 +172,7 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, { unsigned height, tile_height; - height = u_minify(desc->b.b.height0, level); + height = u_minify(desc->height0, level); if (util_format_is_plain(desc->b.b.format)) { tile_height = r300_get_pixel_alignment(desc->b.b.format, @@ -237,7 +237,7 @@ static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, r300_texture_get_nblocksy(desc, i, FALSE); } - size *= desc->b.b.depth0; + size *= desc->depth0; desc->size_in_bytes = size; } } @@ -256,7 +256,7 @@ static void r300_setup_miptree(struct r300_screen *screen, { struct pipe_resource *base = &desc->b.b; unsigned stride, size, layer_size, nblocksy, i; - boolean rv350_mode = screen->caps.is_rv350; + boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean aligned_for_cbzb; desc->size_in_bytes = 0; @@ -292,7 +292,7 @@ static void r300_setup_miptree(struct r300_screen *screen, if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else - size = layer_size * u_minify(base->depth0, i); + size = layer_size * u_minify(desc->depth0, i); desc->offset_in_bytes[i] = desc->size_in_bytes; desc->size_in_bytes = desc->offset_in_bytes[i] + size; @@ -303,8 +303,8 @@ static void r300_setup_miptree(struct r300_screen *screen, SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride, desc->size_in_bytes, + i, u_minify(desc->width0, i), u_minify(desc->height0, i), + u_minify(desc->depth0, i), stride, desc->size_in_bytes, desc->macrotile[i] ? "TRUE" : "FALSE"); } } @@ -313,14 +313,14 @@ static void r300_setup_flags(struct r300_texture_desc *desc) { desc->uses_stride_addressing = !util_is_power_of_two(desc->b.b.width0) || - !util_is_power_of_two(desc->b.b.height0) || (desc->stride_in_bytes_override && stride_to_width(desc->b.b.format, desc->stride_in_bytes_override) != desc->b.b.width0); desc->is_npot = desc->uses_stride_addressing || - !util_is_power_of_two(desc->b.b.height0); + !util_is_power_of_two(desc->b.b.height0) || + !util_is_power_of_two(desc->b.b.depth0); } static void r300_setup_cbzb_flags(struct r300_screen *rscreen, @@ -351,7 +351,7 @@ static void r300_setup_tiling(struct r300_screen *screen, { struct r300_winsys_screen *rws = screen->rws; enum pipe_format format = desc->b.b.format; - boolean rv350_mode = screen->caps.is_rv350; + boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean is_zb = util_format_is_depth_or_stencil(format); boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); @@ -368,11 +368,11 @@ static void r300_setup_tiling(struct r300_screen *screen, switch (util_format_get_blocksize(format)) { case 1: case 4: + case 8: desc->microtile = R300_BUFFER_TILED; break; case 2: - case 8: if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { desc->microtile = R300_BUFFER_SQUARETILED; } @@ -416,9 +416,21 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, { desc->b.b = *base; desc->b.b.screen = &rscreen->screen; - desc->stride_in_bytes_override = stride_in_bytes_override; + desc->width0 = base->width0; + desc->height0 = base->height0; + desc->depth0 = base->depth0; + r300_setup_flags(desc); + + /* Align a 3D NPOT texture to POT. */ + if (base->target == PIPE_TEXTURE_3D && desc->is_npot) { + desc->width0 = util_next_power_of_two(desc->width0); + desc->height0 = util_next_power_of_two(desc->height0); + desc->depth0 = util_next_power_of_two(desc->depth0); + } + + /* Setup tiling. */ if (microtile == R300_BUFFER_SELECT_LAYOUT || macrotile == R300_BUFFER_SELECT_LAYOUT) { r300_setup_tiling(rscreen, desc); @@ -428,7 +440,6 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, assert(desc->b.b.last_level == 0); } - r300_setup_flags(desc); r300_setup_cbzb_flags(rscreen, desc); /* Setup the miptree description. */ @@ -444,10 +455,10 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, if (max_buffer_size) { /* Make sure the buffer we got is large enough. */ if (desc->size_in_bytes > max_buffer_size) { - fprintf(stderr, "r300: texture_from_handle: The buffer is not " + fprintf(stderr, "r300: texture_desc_init: The buffer is not " "large enough. Got: %i, Need: %i, Info:\n", max_buffer_size, desc->size_in_bytes); - r300_tex_print_info(rscreen, desc, "texture_from_handle"); + r300_tex_print_info(rscreen, desc, "texture_desc_init"); return FALSE; } @@ -457,7 +468,7 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, } if (SCREEN_DBG_ON(rscreen, DBG_TEX)) - r300_tex_print_info(rscreen, desc, "texture_from_handle"); + r300_tex_print_info(rscreen, desc, "texture_desc_init"); return TRUE; } diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index 3cdb963f97..ede0bb2ec4 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -7,22 +7,18 @@ LIBRARY_INCLUDES = \ $(shell pkg-config libdrm --cflags-only-I) C_SOURCES = \ - r600_buffer.c \ - r600_state2.c \ - r600_context.c \ - r600_shader.c \ - r600_draw.c \ + r600_asm.c \ r600_blit.c \ + r600_buffer.c \ r600_helper.c \ + r600_pipe.c \ r600_query.c \ r600_resource.c \ - r600_screen.c \ + r600_shader.c \ r600_state.c \ r600_texture.c \ - r600_asm.c \ r700_asm.c \ - r600_hw_states.c \ - eg_asm.c \ - eg_hw_states.c + evergreen_state.c \ + eg_asm.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 99c8644e02..bf0ad8571b 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -16,19 +16,19 @@ env.Append(CPPPATH = [ r600 = env.ConvenienceLibrary( target = 'r600', source = [ + 'r600_asm.c', 'r600_buffer.c', - 'r600_context.c', - 'r600_draw.c', 'r600_blit.c', 'r600_helper.c', + 'r600_pipe.c', 'r600_query.c', 'r600_resource.c', - 'r600_screen.c', + 'r600_shader.c', 'r600_state.c', 'r600_texture.c', - 'r600_shader.c', - 'r600_asm.c', 'r700_asm.c', + 'evergreen_state.c', + 'eg_asm.c', ]) Export('r600') diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 769f550874..52b7189e9e 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -20,14 +20,13 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "radeon.h" -#include "r600_asm.h" -#include "r600_context.h" +#include <stdio.h> +#include <errno.h> #include "util/u_memory.h" +#include "r600_pipe.h" +#include "r600_asm.h" #include "eg_sq.h" #include "r600_opcodes.h" -#include <stdio.h> -#include <errno.h> int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { @@ -73,8 +72,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COND(cf->cond) | - S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); + S_SQ_CF_WORD1_COND(cf->cond) | + S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); break; default: diff --git a/src/gallium/drivers/r600/eg_hw_states.c b/src/gallium/drivers/r600/eg_hw_states.c deleted file mode 100644 index d6f417e1e3..0000000000 --- a/src/gallium/drivers/r600/eg_hw_states.c +++ /dev/null @@ -1,1215 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Dave Airlie - */ -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_blitter.h> -#include "util/u_pack_color.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" -#include "eg_state_inlines.h" -#include "evergreend.h" - -#include "eg_states_inc.h" - -static void eg_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - int i; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0); - rstate->states[EG_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); - rstate->states[EG_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); - rstate->states[EG_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); - rstate->states[EG_BLEND__CB_BLEND_ALPHA] = fui(rctx->blend_color.color[3]); - rstate->states[EG_BLEND__CB_BLEND0_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND1_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND2_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND3_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND4_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND5_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND6_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND7_CONTROL] = 0x00000000; - - for (i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; - uint32_t bc = 0; - - if (!state->rt[i].blend_enable) - continue; - - bc |= S_028780_BLEND_CONTROL_ENABLE(1); - - bc |= S_028780_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); - bc |= S_028780_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); - bc |= S_028780_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); - - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= S_028780_SEPARATE_ALPHA_BLEND(1); - bc |= S_028780_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); - bc |= S_028780_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); - bc |= S_028780_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); - } - - rstate->states[EG_BLEND__CB_BLEND0_CONTROL + i] = bc; - } - - radeon_state_pm4(rstate); -} - -static void eg_ucp(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_clip_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0); - - for (int i = 0; i < state->nr; i++) { - rstate->states[i * 4 + 0] = fui(state->ucp[i][0]); - rstate->states[i * 4 + 1] = fui(state->ucp[i][1]); - rstate->states[i * 4 + 2] = fui(state->ucp[i][2]); - rstate->states[i * 4 + 3] = fui(state->ucp[i][3]); - } - radeon_state_pm4(rstate); -} - -static void eg_cb(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state, int cb) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level = state->cbufs[cb]->level; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0, cb, 0); - rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; - rbuffer = &rtex->resource; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - rstate->nbo = 1; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; - - ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_028C70_NUMBER_SRGB; - - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); - - color_info = S_028C70_FORMAT(format) | - S_028C70_COMP_SWAP(swap) | - S_028C70_BLEND_CLAMP(1) | - S_028C70_SOURCE_FORMAT(1) | - S_028C70_NUMBER_TYPE(ntype); - - rstate->states[EG_CB__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8; - rstate->states[EG_CB__CB_COLOR0_INFO] = color_info; - rstate->states[EG_CB__CB_COLOR0_PITCH] = S_028C64_PITCH_TILE_MAX(pitch); - rstate->states[EG_CB__CB_COLOR0_SLICE] = S_028C68_SLICE_TILE_MAX(slice); - rstate->states[EG_CB__CB_COLOR0_VIEW] = 0x00000000; - rstate->states[EG_CB__CB_COLOR0_ATTRIB] = S_028C74_NON_DISP_TILING_ORDER(1); - - radeon_state_pm4(rstate); -} - -static void eg_db(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level; - unsigned pitch, slice, format; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); - if (state->zsbuf == NULL) - return; - - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tilled = 1; - rtex->array_mode = 2; - rtex->tile_type = 1; - rtex->depth = 1; - rbuffer = &rtex->resource; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - rstate->states[EG_DB__DB_HTILE_DATA_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_Z_READ_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_Z_WRITE_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_STENCIL_READ_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_STENCIL_WRITE_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_Z_INFO] = S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format); - rstate->states[EG_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[EG_DB__DB_DEPTH_SIZE] = S_028058_PITCH_TILE_MAX(pitch); - rstate->states[EG_DB__DB_DEPTH_SLICE] = S_02805C_SLICE_TILE_MAX(slice); - radeon_state_pm4(rstate); -} - -static void eg_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - const struct pipe_clip_state *clip = NULL; - struct r600_screen *rscreen = rctx->screen; - float offset_units = 0, offset_scale = 0; - char depth = 0; - unsigned offset_db_fmt_cntl = 0; - unsigned tmp; - unsigned prov_vtx = 1; - - if (rctx->clip) - clip = &rctx->clip->state.clip; - if (fb->zsbuf) { - offset_units = state->offset_units; - offset_scale = state->offset_scale * 12.0f; - switch (fb->zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return; - } - } - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - - if (state->flatshade_first) - prov_vtx = 0; - - rctx->flat_shade = state->flatshade; - radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000000; - if (rctx->flat_shade) - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |= S_0286D4_FLAT_SHADE_ENA(1); - if (state->sprite_coord_enable) { - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_TOP_1(1); - } - } - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] = 0; - if (clip) { - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1); - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp); - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); - } - rstate->states[EG_RASTERIZER__PA_SU_SC_MODE_CNTL] = - S_028814_PROVOKING_VTX_LAST(prov_vtx) | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); - rstate->states[EG_RASTERIZER__PA_CL_VS_OUT_CNTL] = - S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); - rstate->states[EG_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; - /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0); - rstate->states[EG_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp); - rstate->states[EG_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; - rstate->states[EG_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; - rstate->states[EG_RASTERIZER__PA_SU_VTX_CNTL] = 0x00000005; - - rstate->states[EG_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; - rstate->states[EG_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; - rstate->states[EG_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl; - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale); - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - radeon_state_pm4(rstate); -} - -static void eg_scissor(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - struct r600_screen *rscreen = rctx->screen; - unsigned minx, maxx, miny, maxy; - u32 tl, br; - - if (state == NULL) { - minx = 0; - miny = 0; - maxx = fb->cbufs[0]->width; - maxy = fb->cbufs[0]->height; - } else { - minx = state->minx; - miny = state->miny; - maxx = state->maxx; - maxy = state->maxy; - } - tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny); - br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy); - radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); - /* screen scissor has no WINDOW OFFSET */ - rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; - rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl | S_028204_WINDOW_OFFSET_DISABLE(1); - rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl | S_028240_WINDOW_OFFSET_DISABLE(1); - rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl | S_028240_WINDOW_OFFSET_DISABLE(1); - rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - radeon_state_pm4(rstate); -} - -static void eg_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - rstate->states[EG_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; - rstate->states[EG_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - rstate->states[EG_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); - rstate->states[EG_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - radeon_state_pm4(rstate); -} - -static void eg_dsa(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; - const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; - struct r600_screen *rscreen = rctx->screen; - unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; - unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - struct r600_shader *rshader; - struct r600_query *rquery = NULL; - boolean query_running; - int i; - - if (rctx->ps_shader == NULL) { - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); - - db_shader_control = 0; - db_shader_control |= S_02880C_DUAL_EXPORT_ENABLE(1); - db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); - - rshader = &rctx->ps_shader->shader; - if (rshader->uses_kill) - db_shader_control |= S_02880C_KILL_ENABLE(1); - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); - } - stencil_ref_mask = 0; - stencil_ref_mask_bf = 0; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ - - if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); - db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); - db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - - stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask); - stencil_ref_mask |= S_028430_STENCILREF(stencil_ref->ref_value[0]); - if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); - db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); - db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); - stencil_ref_mask_bf |= S_028430_STENCILREF(stencil_ref->ref_value[1]); - } - } - - alpha_test_control = 0; - alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); - alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); - } - - db_render_control = 0; -/// db_render_control = S_028D0C_STENCIL_COMPRESS_DISABLE(1) | -/// S_028D0C_DEPTH_COMPRESS_DISABLE(1); - db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - - query_running = false; - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - query_running = true; - } - } - - if (query_running) { - db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); - db_render_control |= S_028D0C_PERFECT_ZPASS_COUNTS(1); - } - - rstate->states[EG_DSA__DB_STENCIL_CLEAR] = 0x00000000; - rstate->states[EG_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[EG_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; - rstate->states[EG_DSA__DB_STENCILREFMASK] = stencil_ref_mask; - rstate->states[EG_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf; - rstate->states[EG_DSA__SX_ALPHA_REF] = alpha_ref; - // rstate->states[EG_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; - // rstate->states[EG_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; - rstate->states[EG_DSA__SPI_FOG_CNTL] = 0x00000000; - rstate->states[EG_DSA__DB_DEPTH_CONTROL] = db_depth_control; - rstate->states[EG_DSA__DB_SHADER_CONTROL] = db_shader_control; - rstate->states[EG_DSA__DB_RENDER_CONTROL] = db_render_control; - rstate->states[EG_DSA__DB_RENDER_OVERRIDE] = db_render_override; - - rstate->states[EG_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; - rstate->states[EG_DSA__DB_PRELOAD_CONTROL] = 0x00000000; - rstate->states[EG_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - radeon_state_pm4(rstate); -} - - -static INLINE u32 S_FIXED(float value, u32 frac_bits) -{ - return value * (1 << frac_bits); -} - -static void eg_sampler_border(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER_BORDER, id, R600_SHADER_PS); - if (uc.ui) { - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED] = fui(state->border_color[0]); - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN] = fui(state->border_color[1]); - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE] = fui(state->border_color[2]); - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA] = fui(state->border_color[3]); - } - radeon_state_pm4(rstate); -} - -static void eg_sampler(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS); - rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = - S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | - S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | - S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | - S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | - S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | - S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | - S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0); - /* FIXME LOD it depends on texture base level ... */ - rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)); - - rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = - S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)) | -S_03C008_TYPE(1); - radeon_state_pm4(rstate); - -} - - -static void eg_resource(struct pipe_context *ctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_screen *rscreen = rctx->screen; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; - struct r600_resource *rbuffer; - unsigned format; - uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4]; - int r; - - rstate->cpm4 = 0; - swizzle[0] = view->swizzle_r; - swizzle[1] = view->swizzle_g; - swizzle[2] = view->swizzle_b; - swizzle[3] = view->swizzle_a; - format = r600_translate_texformat(view->texture->format, - swizzle, - &word4, &yuv_format); - if (format == ~0) { - return; - } - desc = util_format_description(view->texture->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", view->texture->format); - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS); - tmp = (struct r600_resource_texture*)view->texture; - rbuffer = &tmp->resource; - if (tmp->depth) { - r = r600_texture_from_depth(ctx, tmp, view->first_level); - if (r) { - return; - } - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], tmp->uncompressed); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], tmp->uncompressed); - } else { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo); - } - rstate->nbo = 2; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[1] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; - - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; - - /* FIXME properly handle first level != 0 */ - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD0] = - S_030000_DIM(r600_tex_dim(view->texture->target)) | - S_030000_PITCH((pitch / 8) - 1) | - S_030000_TEX_WIDTH(view->texture->width0 - 1); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD1] = - S_030004_TEX_HEIGHT(view->texture->height0 - 1) | - S_030004_TEX_DEPTH(view->texture->depth0 - 1); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8; - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD4] = - word4 | - S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | - S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | - S_030010_REQUEST_SIZE(1) | - S_030010_BASE_LEVEL(view->first_level); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD5] = - S_030014_LAST_LEVEL(view->last_level) | - S_030014_BASE_ARRAY(0) | - S_030014_LAST_ARRAY(0); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD6] = 0; - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD7] = - S_03001C_DATA_FORMAT(format) | - S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE); - radeon_state_pm4(rstate); -} - -static void eg_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_blend_state *pbs = &rctx->blend->state.blend; - int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; - uint32_t color_control, target_mask, shader_mask; - int i; - - target_mask = 0; - shader_mask = 0; - color_control = S_028808_MODE(1); - - for (i = 0; i < nr_cbufs; i++) { - shader_mask |= 0xf << (i * 4); - } - - if (pbs->logicop_enable) { - color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20); - } else { - color_control |= (0xcc << 16); - } - - if (pbs->independent_blend_enable) { - for (i = 0; i < nr_cbufs; i++) { - target_mask |= (pbs->rt[i].colormask << (4 * i)); - } - } else { - for (i = 0; i < nr_cbufs; i++) { - target_mask |= (pbs->rt[0].colormask << (4 * i)); - } - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0); - rstate->states[EG_CB_CNTL__CB_SHADER_MASK] = shader_mask; - rstate->states[EG_CB_CNTL__CB_TARGET_MASK] = target_mask; - rstate->states[EG_CB_CNTL__CB_COLOR_CONTROL] = color_control; - rstate->states[EG_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; - rstate->states[EG_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; - rstate->states[EG_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rstate); -} - - -static void eg_init_config(struct r600_context *rctx) -{ - int ps_prio; - int vs_prio; - int gs_prio; - int es_prio; - int hs_prio, cs_prio, ls_prio; - int num_ps_gprs; - int num_vs_gprs; - int num_gs_gprs; - int num_es_gprs; - int num_hs_gprs; - int num_ls_gprs; - int num_temp_gprs; - int num_ps_threads; - int num_vs_threads; - int num_gs_threads; - int num_es_threads; - int num_hs_threads; - int num_ls_threads; - int num_ps_stack_entries; - int num_vs_stack_entries; - int num_gs_stack_entries; - int num_es_stack_entries; - int num_hs_stack_entries; - int num_ls_stack_entries; - enum radeon_family family; - - family = radeon_get_family(rctx->rw); - ps_prio = 0; - vs_prio = 1; - gs_prio = 2; - es_prio = 3; - hs_prio = 0; - ls_prio = 0; - cs_prio = 0; - - switch (family) { - case CHIP_CEDAR: - default: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 96; - num_vs_threads = 16; - num_gs_threads = 16; - num_es_threads = 16; - num_hs_threads = 16; - num_ls_threads = 16; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_REDWOOD: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_JUNIPER: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - } - - radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0); - - rctx->config.states[EG_CONFIG__SQ_CONFIG] = 0x00000000; - switch (family) { - case CHIP_CEDAR: - break; - default: - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); - break; - } - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_EXPORT_SRC_C(1); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_CS_PRIO(cs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_LS_PRIO(ls_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_HS_PRIO(hs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_ES_GPRS(num_es_gprs); - - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] = 0; - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] |= S_008C0C_NUM_LS_GPRS(num_ls_gprs); - - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] = 0; - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_PS_THREADS(num_ps_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_VS_THREADS(num_vs_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_GS_THREADS(num_gs_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_ES_THREADS(num_es_threads); - - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] = 0; - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] |= S_008C1C_NUM_HS_THREADS(num_hs_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] |= S_008C1C_NUM_LS_THREADS(num_ls_threads); - - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] = 0; - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); - - rctx->config.states[EG_CONFIG__SPI_CONFIG_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__SPI_CONFIG_CNTL_1] = S_00913C_VTX_DONE_DELAY(4); - - rctx->config.states[EG_CONFIG__SX_MISC] = 0x00000000; - - rctx->config.states[EG_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000; - rctx->config.states[EG_CONFIG__PA_SC_MODE_CNTL_0] = 0x0; - rctx->config.states[EG_CONFIG__PA_SC_MODE_CNTL_1] = 0x0; - - rctx->config.states[EG_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_1] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_2] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_3] = 0x00000000; - - rctx->config.states[EG_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_STRMOUT_CONFIG] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_STRMOUT_BUFFER_CONFIG] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->config.states[EG_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; -// rctx->config.states[EG_CONFIG__VGT_CACHE_INVALIDATION] = 0x2; -// rctx->config.states[EG_CONFIG__VGT_GS_VERTEX_REUSE] = 0x16; - rctx->config.states[EG_CONFIG__PA_CL_ENHANCE] = (3 << 1) | 1; - - radeon_state_pm4(&rctx->config); -} - -static int eg_vs_resource(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset, - uint32_t stride, uint32_t format) -{ - struct radeon_state *vs_resource = &rctx->vs_resource[id]; - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_VS); - - radeon_ws_bo_reference(rscreen->rw, &vs_resource->bo[0], rbuffer->bo); - vs_resource->nbo = 1; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->size - offset - 1; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD2] = S_030008_STRIDE(stride) | - S_030008_DATA_FORMAT(format); - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD3] = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W); - - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD6] = 0x00000000; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD7] = 0xC0000000; - vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; - vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(vs_resource); -} - -static int eg_draw_vgt_init(struct r600_draw *draw, - int vgt_draw_initiator) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - struct r600_resource *rbuffer = (struct r600_resource *)draw->index_buffer; - radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0); - draw->draw.states[EG_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw.states[EG_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; - draw->draw.states[EG_DRAW__VGT_DMA_BASE] = draw->index_buffer_offset; - if (rbuffer) { - radeon_ws_bo_reference(rscreen->rw, &draw->draw.bo[0], rbuffer->bo); - draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw.nbo = 1; - } - return radeon_state_pm4(&draw->draw); -} - -static int eg_draw_vgt_prim(struct r600_draw *draw, - uint32_t prim, uint32_t vgt_dma_index_type) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0); - draw->vgt.states[EG_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt.states[EG_VGT__VGT_MAX_VTX_INDX] = draw->max_index; - draw->vgt.states[EG_VGT__VGT_MIN_VTX_INDX] = draw->min_index; - draw->vgt.states[EG_VGT__VGT_INDX_OFFSET] = draw->start; - draw->vgt.states[EG_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt.states[EG_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt.states[EG_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt.states[EG_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt.states[EG_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt.states[EG_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - return radeon_state_pm4(&draw->vgt); -} - - -static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_rasterizer_state *rasterizer; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp, exports_ps, num_cout; - boolean have_pos = FALSE; - - rasterizer = &rctx->rasterizer->state.rasterizer; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(i); - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - if (rasterizer->sprite_coord_enable & (1 << i)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - state->states[EG_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - exports_ps |= (1 << (num_cout+1)); - num_cout++; - } - } - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - if (have_pos) { - state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1); - state->states[EG_PS_SHADER__SPI_INPUT_Z] |= 1; - } - state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[EG_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | - S_028844_STACK_SIZE(rshader->bc.nstack); - state->states[EG_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; - state->states[EG_PS_SHADER__SPI_BARYC_CNTL] = S_0286E0_PERSP_CENTROID_ENA(1) | - S_0286E0_LINEAR_CENTROID_ENA(1); - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - state->nbo = 1; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -static int eg_vs_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); - for (i = 0; i < 10; i++) { - state->states[EG_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; - } - /* so far never got proper semantic id from tgsi */ - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - state->states[EG_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; - } - state->states[EG_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); - state->states[EG_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028860_NUM_GPRS(rshader->bc.ngpr) | - S_028860_STACK_SIZE(rshader->bc.nstack); - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - radeon_ws_bo_reference(rscreen->rw, &state->bo[1], rpshader->bo); - state->nbo = 2; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - state->placement[2] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -static void eg_texture_state_scissor(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned level) -{ - struct radeon_state *rstate = &rtexture->scissor[level]; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - rstate->states[EG_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = 0x80000000; - - radeon_state_pm4(rstate); -} - -static void eg_texture_state_cb(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned cb, unsigned level) -{ - struct radeon_state *rstate; - struct r600_resource *rbuffer; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype, attrib; - const struct util_format_description *desc; - - rstate = &rtexture->cb[cb][level]; - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0, cb, 0); - rbuffer = &rtexture->resource; - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1; - slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1; - ntype = 0; - attrib = 0; - desc = util_format_description(rbuffer->base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_028C70_NUMBER_SRGB; - format = r600_translate_colorformat(rtexture->resource.base.b.format); - swap = r600_translate_colorswap(rtexture->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rtexture->uncompressed); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - color_info = 0; - } else { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - color_info = S_028C70_SOURCE_FORMAT(1); - } - color_info |= S_028C70_FORMAT(format) | - S_028C70_COMP_SWAP(swap) | - S_028C70_BLEND_CLAMP(1) | - S_028C70_NUMBER_TYPE(ntype); - rstate->states[EG_CB__CB_COLOR0_BASE] = rtexture->offset[level] >> 8; - rstate->states[EG_CB__CB_COLOR0_INFO] = color_info; - rstate->states[EG_CB__CB_COLOR0_PITCH] = S_028C64_PITCH_TILE_MAX(pitch); - rstate->states[EG_CB__CB_COLOR0_SLICE] = S_028C68_SLICE_TILE_MAX(slice); - rstate->states[EG_CB__CB_COLOR0_VIEW] = 0x00000000; - rstate->states[EG_CB__CB_COLOR0_ATTRIB] = S_028C74_NON_DISP_TILING_ORDER(1); - - radeon_state_pm4(rstate); -} - -static void eg_texture_state_db(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) -{ - struct radeon_state *rstate = &rtexture->db[level]; - struct r600_resource *rbuffer; - unsigned pitch, slice, format; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); - rbuffer = &rtexture->resource; - rtexture->tilled = 1; - rtexture->array_mode = 2; - rtexture->tile_type = 1; - rtexture->depth = 1; - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1; - slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1; - format = r600_translate_dbformat(rbuffer->base.b.format); - rstate->states[EG_DB__DB_Z_READ_BASE] = rtexture->offset[level] >> 8; - rstate->states[EG_DB__DB_Z_WRITE_BASE] = rtexture->offset[level] >> 8; - rstate->states[EG_DB__DB_Z_INFO] = S_028040_ARRAY_MODE(rtexture->array_mode) | S_028040_FORMAT(format); - rstate->states[EG_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[EG_DB__DB_DEPTH_SIZE] = S_028058_PITCH_TILE_MAX(pitch); - rstate->states[EG_DB__DB_DEPTH_SLICE] = S_02805C_SLICE_TILE_MAX(slice); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - - radeon_state_pm4(rstate); -} - -static void eg_texture_state_viewport(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) -{ - struct radeon_state *rstate = &rtexture->viewport[level]; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[EG_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui((float)rtexture->height[level]/2.0); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui((float)-rtexture->height[level]/2.0); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = 0x3F000000; - rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = 0x3F000000; - rstate->states[EG_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - rstate->states[EG_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - - radeon_state_pm4(rstate); -} - -struct r600_context_hw_state_vtbl eg_hw_state_vtbl = { - .blend = eg_blend, - .ucp = eg_ucp, - .cb = eg_cb, - .db = eg_db, - .rasterizer = eg_rasterizer, - .scissor = eg_scissor, - .viewport = eg_viewport, - .dsa = eg_dsa, - .sampler_border = eg_sampler_border, - .sampler = eg_sampler, - .resource = eg_resource, - .cb_cntl = eg_cb_cntl, - .vs_resource = eg_vs_resource, - .vgt_init = eg_draw_vgt_init, - .vgt_prim = eg_draw_vgt_prim, - .vs_shader = eg_vs_shader, - .ps_shader = eg_ps_shader, - .init_config = eg_init_config, - .texture_state_viewport = eg_texture_state_viewport, - .texture_state_db = eg_texture_state_db, - .texture_state_cb = eg_texture_state_cb, - .texture_state_scissor = eg_texture_state_scissor, -}; - -void eg_set_constant_buffer(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, type, shader_class, size; - struct radeon_state *rstate, *rstates; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - type = R600_STATE_CBUF; - - switch (shader) { - case PIPE_SHADER_VERTEX: - shader_class = R600_SHADER_VS; - rstates = rctx->vs_constant; - break; - case PIPE_SHADER_FRAGMENT: - shader_class = R600_SHADER_PS; - rstates = rctx->ps_constant; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - - rstate = &rstates[0]; - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - nconstant = buffer->width0 / 16; - size = ALIGN_DIVUP(nconstant, 16); - - radeon_state_init(rstate, rscreen->rw, type, 0, shader_class); - rstate->states[EG_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0] = size; - rstate->states[EG_VS_CBUF__ALU_CONST_CACHE_VS_0] = 0; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - if (radeon_state_pm4(rstate)) - return; - radeon_draw_bind(&rctx->draw, rstate); -} diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 4e3514638b..be81c28b43 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "evergreend.h" +#include "r600_formats.h" static INLINE uint32_t r600_translate_blend_function(int blend_func) { @@ -123,6 +124,21 @@ static INLINE uint32_t r600_translate_stencil_op(int s_op) return 0; } +static INLINE uint32_t r600_translate_fill(uint32_t func) +{ + switch(func) { + case PIPE_POLYGON_MODE_FILL: + return 2; + case PIPE_POLYGON_MODE_LINE: + return 1; + case PIPE_POLYGON_MODE_POINT: + return 0; + default: + assert(0); + return 0; + } +} + /* translates straight */ static INLINE uint32_t r600_translate_ds_func(int func) { @@ -136,17 +152,17 @@ static inline unsigned r600_tex_wrap(unsigned wrap) case PIPE_TEX_WRAP_REPEAT: return V_03C000_SQ_TEX_WRAP; case PIPE_TEX_WRAP_CLAMP: - return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return V_03C000_SQ_TEX_CLAMP_BORDER; case PIPE_TEX_WRAP_MIRROR_REPEAT: return V_03C000_SQ_TEX_MIRROR; case PIPE_TEX_WRAP_MIRROR_CLAMP: - return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; } @@ -261,6 +277,14 @@ static inline uint32_t r600_translate_dbformat(enum pipe_format format) } } +static inline uint32_t r600_translate_stencilformat(enum pipe_format format) +{ + if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + return 1; + else + return 0; +} + static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { @@ -283,6 +307,15 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + return V_028C70_SWAP_STD; /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -310,12 +343,19 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028C70_SWAP_STD; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_SWAP_STD; + case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_028C70_SWAP_STD_REV; + case PIPE_FORMAT_R16G16_UNORM: + return V_028C70_SWAP_STD; + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -357,6 +397,15 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: return V_028C70_COLOR_4_4_4_4; + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_COLOR_16; + + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_028C70_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -383,18 +432,37 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028C70_COLOR_8_24; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_COLOR_24_8; + case PIPE_FORMAT_R32_FLOAT: return V_028C70_COLOR_32_FLOAT; + case PIPE_FORMAT_R16G16_FLOAT: + return V_028C70_COLOR_16_16_FLOAT; + + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: + return V_028C70_COLOR_16_16; + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: return V_028C70_COLOR_16_16_16_16; + + case PIPE_FORMAT_R16G16B16_FLOAT: case PIPE_FORMAT_R16G16B16A16_FLOAT: return V_028C70_COLOR_16_16_16_16_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: return V_028C70_COLOR_32_32_FLOAT; + case PIPE_FORMAT_R32G32_SSCALED: + return V_028C70_COLOR_32_32; + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32_FLOAT: return V_028C70_COLOR_32_32_32_FLOAT; @@ -431,4 +499,173 @@ static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) return r600_translate_colorformat(format) != ~0; } +static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) +{ + uint32_t result = 0; + const struct util_format_description *desc; + unsigned i; + + desc = util_format_description(format); + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + goto out_unknown; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { + /* Half-floats, floats, doubles */ + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[i].size) { + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16_FLOAT; + break; + case 2: + result = FMT_16_16_FLOAT; + break; + case 3: + result = FMT_16_16_16_FLOAT; + break; + case 4: + result = FMT_16_16_16_16_FLOAT; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32_FLOAT; + break; + case 2: + result = FMT_32_32_FLOAT; + break; + case 3: + result = FMT_32_32_32_FLOAT; + break; + case 4: + result = FMT_32_32_32_32_FLOAT; + break; + } + break; + default: + goto out_unknown; + } + break; + /* Unsigned ints */ + case UTIL_FORMAT_TYPE_UNSIGNED: + /* Signed ints */ + case UTIL_FORMAT_TYPE_SIGNED: + switch (desc->channel[i].size) { + case 8: + switch (desc->nr_channels) { + case 1: + result = FMT_8; + break; + case 2: + result = FMT_8_8; + break; + case 3: +// result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */ +// break; + case 4: + result = FMT_8_8_8_8; + break; + } + break; + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16; + break; + case 2: + result = FMT_16_16; + break; + case 3: +// result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */ +// break; + case 4: + result = FMT_16_16_16_16; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32; + break; + case 2: + result = FMT_32_32; + break; + case 3: + result = FMT_32_32_32; + break; + case 4: + result = FMT_32_32_32_32; + break; + } + break; + default: + goto out_unknown; + } + break; + default: + goto out_unknown; + } + + result = S_030008_DATA_FORMAT(result); + + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= S_030008_FORMAT_COMP_ALL(1); + } + if (desc->channel[i].normalized) { + result |= S_030008_NUM_FORMAT_ALL(0); + } else { + result |= S_030008_NUM_FORMAT_ALL(2); + } + return result; +out_unknown: + R600_ERR("unsupported vertex format %s\n", util_format_name(format)); + return ~0; +} + +static INLINE uint32_t r600_translate_vertex_data_swizzle(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + uint32_t word3; + + assert(format); + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + fprintf(stderr, "r600: Bad format %s in %s:%d\n", + util_format_short_name(format), __FUNCTION__, __LINE__); + return 0; + } + + word3 = 0; + for (i = 0; i < desc->nr_channels; i++) { + switch (i) { + case 0: + word3 |= S_03000C_DST_SEL_X(desc->swizzle[0]); + break; + case 1: + word3 |= S_03000C_DST_SEL_Y(desc->swizzle[1]); + break; + case 2: + word3 |= S_03000C_DST_SEL_Z(desc->swizzle[2]); + break; + case 3: + word3 |= S_03000C_DST_SEL_W(desc->swizzle[3]); + break; + } + } + return word3; +} + #endif diff --git a/src/gallium/drivers/r600/eg_states_inc.h b/src/gallium/drivers/r600/eg_states_inc.h index 462f31cc79..1379c11291 100644 --- a/src/gallium/drivers/r600/eg_states_inc.h +++ b/src/gallium/drivers/r600/eg_states_inc.h @@ -150,13 +150,14 @@ #define EG_DSA__DB_DEPTH_CONTROL 7 #define EG_DSA__DB_SHADER_CONTROL 8 #define EG_DSA__DB_RENDER_CONTROL 9 -#define EG_DSA__DB_RENDER_OVERRIDE 10 -#define EG_DSA__DB_RENDER_OVERRIDE2 11 -#define EG_DSA__DB_SRESULTS_COMPARE_STATE0 12 -#define EG_DSA__DB_SRESULTS_COMPARE_STATE1 13 -#define EG_DSA__DB_PRELOAD_CONTROL 14 -#define EG_DSA__DB_ALPHA_TO_MASK 15 -#define EG_DSA_SIZE 16 +#define EG_DSA__DB_COUNT_CONTROL 10 +#define EG_DSA__DB_RENDER_OVERRIDE 11 +#define EG_DSA__DB_RENDER_OVERRIDE2 12 +#define EG_DSA__DB_SRESULTS_COMPARE_STATE0 13 +#define EG_DSA__DB_SRESULTS_COMPARE_STATE1 14 +#define EG_DSA__DB_PRELOAD_CONTROL 15 +#define EG_DSA__DB_ALPHA_TO_MASK 16 +#define EG_DSA_SIZE 17 #define EG_DSA_PM4 128 /* EG_VS_SHADER */ @@ -368,27 +369,30 @@ #define EG_GS_SAMPLER_PM4 128 /* EG_PS_SAMPLER_BORDER */ -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 0 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 1 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 2 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 3 -#define EG_PS_SAMPLER_BORDER_SIZE 4 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX 0 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 1 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 2 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 3 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 4 +#define EG_PS_SAMPLER_BORDER_SIZE 5 #define EG_PS_SAMPLER_BORDER_PM4 128 /* EG_VS_SAMPLER_BORDER */ -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 0 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 1 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 2 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 3 -#define EG_VS_SAMPLER_BORDER_SIZE 4 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_INDEX 0 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 1 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 2 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 3 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 4 +#define EG_VS_SAMPLER_BORDER_SIZE 5 #define EG_VS_SAMPLER_BORDER_PM4 128 /* EG_GS_SAMPLER_BORDER */ -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 0 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 1 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 2 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 3 -#define EG_GS_SAMPLER_BORDER_SIZE 4 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_INDEX 0 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 1 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 2 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 3 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 4 +#define EG_GS_SAMPLER_BORDER_SIZE 5 #define EG_GS_SAMPLER_BORDER_PM4 128 /* EG_CB */ diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c new file mode 100644 index 0000000000..935496c04a --- /dev/null +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -0,0 +1,1743 @@ +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* TODO: + * - fix mask for depth control & cull for query + */ +#include <stdio.h> +#include <errno.h> +#include <pipe/p_defines.h> +#include <pipe/p_state.h> +#include <pipe/p_context.h> +#include <tgsi/tgsi_scan.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_util.h> +#include <util/u_blitter.h> +#include <util/u_double_list.h> +#include <util/u_transfer.h> +#include <util/u_surface.h> +#include <util/u_pack_color.h> +#include <util/u_memory.h> +#include <util/u_inlines.h> +#include <util/u_framebuffer.h> +#include <pipebuffer/pb_buffer.h> +#include "r600.h" +#include "evergreend.h" +#include "r600_resource.h" +#include "r600_shader.h" +#include "r600_pipe.h" +#include "eg_state_inlines.h" + +static void evergreen_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + + if (rstate == NULL) + return; + + rstate->id = R600_PIPE_STATE_BLEND_COLOR; + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); + rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void *evergreen_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *state) +{ + struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); + struct r600_pipe_state *rstate; + u32 color_control, target_mask; + /* FIXME there is more then 8 framebuffer */ + unsigned blend_cntl[8]; + + if (blend == NULL) { + return NULL; + } + rstate = &blend->rstate; + + rstate->id = R600_PIPE_STATE_BLEND; + + target_mask = 0; + color_control = S_028808_MODE(1); + if (state->logicop_enable) { + color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); + } else { + color_control |= (0xcc << 16); + } + /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ + if (state->independent_blend_enable) { + for (int i = 0; i < 8; i++) { + target_mask |= (state->rt[i].colormask << (4 * i)); + } + } else { + for (int i = 0; i < 8; i++) { + target_mask |= (state->rt[0].colormask << (4 * i)); + } + } + blend->cb_target_mask = target_mask; + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + color_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + + for (int i = 0; i < 8; i++) { + unsigned eqRGB = state->rt[i].rgb_func; + unsigned srcRGB = state->rt[i].rgb_src_factor; + unsigned dstRGB = state->rt[i].rgb_dst_factor; + unsigned eqA = state->rt[i].alpha_func; + unsigned srcA = state->rt[i].alpha_src_factor; + unsigned dstA = state->rt[i].alpha_dst_factor; + + blend_cntl[i] = 0; + if (!state->rt[i].blend_enable) + continue; + + blend_cntl[i] |= S_028780_BLEND_CONTROL_ENABLE(1); + blend_cntl[i] |= S_028780_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); + blend_cntl[i] |= S_028780_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); + blend_cntl[i] |= S_028780_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); + + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { + blend_cntl[i] |= S_028780_SEPARATE_ALPHA_BLEND(1); + blend_cntl[i] |= S_028780_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); + blend_cntl[i] |= S_028780_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); + blend_cntl[i] |= S_028780_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); + } + } + for (int i = 0; i < 8; i++) { + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL); + } + + return rstate; +} + +static void evergreen_bind_blend_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; + struct r600_pipe_state *rstate; + + if (state == NULL) + return; + rstate = &blend->rstate; + rctx->states[rstate->id] = rstate; + rctx->cb_target_mask = blend->cb_target_mask; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void *evergreen_create_dsa_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *state) +{ + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; + unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; + + if (rstate == NULL) { + return NULL; + } + + rstate->id = R600_PIPE_STATE_DSA; + /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ + /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be + * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will + * be set if shader use texkill instruction + */ + db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + stencil_ref_mask = 0; + stencil_ref_mask_bf = 0; + db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | + S_028800_Z_WRITE_ENABLE(state->depth.writemask) | + S_028800_ZFUNC(state->depth.func); + + /* stencil */ + if (state->stencil[0].enabled) { + db_depth_control |= S_028800_STENCIL_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); + db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); + db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); + + + stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | + S_028430_STENCILWRITEMASK(state->stencil[0].writemask); + if (state->stencil[1].enabled) { + db_depth_control |= S_028800_BACKFACE_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); + db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); + db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); + stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | + S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); + } + } + + /* alpha */ + alpha_test_control = 0; + alpha_ref = 0; + if (state->alpha.enabled) { + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); + alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); + alpha_ref = fui(state->alpha.ref_value); + } + + /* misc */ + db_render_control = 0; + db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) | + S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | + S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); + /* TODO db_render_override depends on query */ + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028430_DB_STENCILREFMASK, stencil_ref_mask, + 0xFFFFFFFF & C_028430_STENCILREF, NULL); + r600_pipe_state_add_reg(rstate, + R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); + r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + + return rstate; +} + +static void *evergreen_create_rs_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *state) +{ + struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); + struct r600_pipe_state *rstate; + unsigned tmp; + unsigned prov_vtx = 1, polygon_dual_mode; + unsigned clip_rule; + + if (rs == NULL) { + return NULL; + } + + rstate = &rs->rstate; + rs->flatshade = state->flatshade; + rs->sprite_coord_enable = state->sprite_coord_enable; + + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + + /* offset */ + rs->offset_units = state->offset_units; + rs->offset_scale = state->offset_scale * 12.0f; + + rstate->id = R600_PIPE_STATE_RASTERIZER; + if (state->flatshade_first) + prov_vtx = 0; + tmp = 0x00000001; + if (state->sprite_coord_enable) { + tmp |= S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(2) | + S_0286D4_PNT_SPRITE_OVRD_Y(3) | + S_0286D4_PNT_SPRITE_OVRD_Z(0) | + S_0286D4_PNT_SPRITE_OVRD_W(1); + if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { + tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); + } + } + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + + polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || + state->fill_back != PIPE_POLYGON_MODE_FILL); + r600_pipe_state_add_reg(rstate, R_028814_PA_SU_SC_MODE_CNTL, + S_028814_PROVOKING_VTX_LAST(prov_vtx) | + S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | + S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | + S_028814_FACE(!state->front_ccw) | + S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | + S_028814_POLY_MODE(polygon_dual_mode) | + S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, + S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + /* point size 12.4 fixed point */ + tmp = (unsigned)(state->point_size * 8.0); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, 0x00000005, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + return rstate; +} + +static void evergreen_bind_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (state == NULL) + return; + + rctx->flatshade = rs->flatshade; + rctx->sprite_coord_enable = rs->sprite_coord_enable; + rctx->rasterizer = rs; + + rctx->states[rs->rstate.id] = &rs->rstate; + r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); +} + +static void evergreen_delete_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + + if (rctx->rasterizer == rs) { + rctx->rasterizer = NULL; + } + if (rctx->states[rs->rstate.id] == &rs->rstate) { + rctx->states[rs->rstate.id] = NULL; + } + free(rs); +} + +static void *evergreen_create_sampler_state(struct pipe_context *ctx, + const struct pipe_sampler_state *state) +{ + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + union util_color uc; + + if (rstate == NULL) { + return NULL; + } + + rstate->id = R600_PIPE_STATE_SAMPLER; + util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, + S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | + S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | + S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | + S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | + S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | + S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | + S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + /* FIXME LOD it depends on texture base level ... */ + r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, + S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | + S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, + S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)) | + S_03C008_TYPE(1), + 0xFFFFFFFF, NULL); + + if (uc.ui) { + r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + } + return rstate; +} + +static void *evergreen_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + + assert(count < 32); + v->count = count; + v->refcount = 1; + memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + return v; +} + +static void evergreen_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *state) +{ + struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; + + pipe_resource_reference(&state->texture, NULL); + FREE(resource); +} + +static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) +{ + struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); + struct r600_pipe_state *rstate; + const struct util_format_description *desc; + struct r600_resource_texture *tmp; + struct r600_resource *rbuffer; + unsigned format; + uint32_t word4 = 0, yuv_format = 0, pitch = 0; + unsigned char swizzle[4]; + struct r600_bo *bo[2]; + + if (resource == NULL) + return NULL; + rstate = &resource->state; + + /* initialize base object */ + resource->base = *state; + resource->base.texture = NULL; + pipe_reference(NULL, &texture->reference); + resource->base.texture = texture; + resource->base.reference.count = 1; + resource->base.context = ctx; + + swizzle[0] = state->swizzle_r; + swizzle[1] = state->swizzle_g; + swizzle[2] = state->swizzle_b; + swizzle[3] = state->swizzle_a; + format = r600_translate_texformat(state->format, + swizzle, + &word4, &yuv_format); + if (format == ~0) { + format = 0; + } + desc = util_format_description(state->format); + if (desc == NULL) { + R600_ERR("unknow format %d\n", state->format); + } + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + /* FIXME depth texture decompression */ + if (tmp->depth) { + r600_texture_depth_flush(ctx, texture); + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->flushed_depth_texture->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + } + pitch = align(tmp->pitch_in_pixels[0], 8); + + /* FIXME properly handle first level != 0 */ + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, + S_030000_DIM(r600_tex_dim(texture->target)) | + S_030000_PITCH((pitch / 8) - 1) | + S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, + S_030004_TEX_HEIGHT(texture->height0 - 1) | + S_030004_TEX_DEPTH(texture->depth0 - 1), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, + (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, + (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, + word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | + S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | + S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, + S_030014_LAST_LEVEL(state->last_level) | + S_030014_BASE_ARRAY(0) | + S_030014_LAST_ARRAY(0), 0xffffffff, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, + S_03001C_DATA_FORMAT(format) | + S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); + + return &resource->base; +} + +static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + + for (int i = 0; i < count; i++) { + if (resource[i]) { + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + } + } +} + +static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + + rctx->ps_samplers.views = resource; + rctx->ps_samplers.n_views = count; + + for (int i = 0; i < count; i++) { + if (resource[i]) { + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + } + } +} + +static void evergreen_bind_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; + + if (state == NULL) + return; + rctx->states[rstate->id] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + + rctx->ps_samplers.samplers = states; + rctx->ps_samplers.n_samplers = count; + + for (int i = 0; i < count; i++) { + evergreen_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); + } +} + +static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + + for (int i = 0; i < count; i++) { + evergreen_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); + } +} + +static void evergreen_delete_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; + + if (rctx->states[rstate->id] == rstate) { + rctx->states[rstate->id] = NULL; + } + for (int i = 0; i < rstate->nregs; i++) { + r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + } + free(rstate); +} + +static void evergreen_delete_vertex_element(struct pipe_context *ctx, void *state) +{ + struct r600_vertex_element *v = (struct r600_vertex_element*)state; + + if (v == NULL) + return; + if (--v->refcount) + return; + free(v); +} + +static void evergreen_set_clip_state(struct pipe_context *ctx, + const struct pipe_clip_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + + if (rstate == NULL) + return; + + rctx->clip = *state; + rstate->id = R600_PIPE_STATE_CLIP; + for (int i = 0; i < state->nr; i++) { + r600_pipe_state_add_reg(rstate, + R_0285BC_PA_CL_UCP0_X + i * 4, + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0285C0_PA_CL_UCP0_Y + i * 4, + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0285C4_PA_CL_UCP0_Z + i * 4, + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0285C8_PA_CL_UCP0_W + i * 4, + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + } + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, + S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | + S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_CLIP]); + rctx->states[R600_PIPE_STATE_CLIP] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_bind_vertex_elements(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_vertex_element *v = (struct r600_vertex_element*)state; + + evergreen_delete_vertex_element(ctx, rctx->vertex_elements); + rctx->vertex_elements = v; + if (v) { + v->refcount++; +// rctx->vs_rebuild = TRUE; + } +} + +static void evergreen_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *state) +{ +} + +static void evergreen_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ +} + +static void evergreen_set_scissor_state(struct pipe_context *ctx, + const struct pipe_scissor_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 tl, br; + + if (rstate == NULL) + return; + + rstate->id = R600_PIPE_STATE_SCISSOR; + tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny); + br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + r600_pipe_state_add_reg(rstate, + R_028210_PA_SC_CLIPRECT_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028214_PA_SC_CLIPRECT_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028218_PA_SC_CLIPRECT_1_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02821C_PA_SC_CLIPRECT_1_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028220_PA_SC_CLIPRECT_2_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028224_PA_SC_CLIPRECT_2_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028228_PA_SC_CLIPRECT_3_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02822C_PA_SC_CLIPRECT_3_BR, br, + 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_SCISSOR]); + rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_set_stencil_ref(struct pipe_context *ctx, + const struct pipe_stencil_ref *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 tmp; + + if (rstate == NULL) + return; + + rctx->stencil_ref = *state; + rstate->id = R600_PIPE_STATE_STENCIL_REF; + tmp = S_028430_STENCILREF(state->ref_value[0]); + r600_pipe_state_add_reg(rstate, + R_028430_DB_STENCILREFMASK, tmp, + ~C_028430_STENCILREF, NULL); + tmp = S_028434_STENCILREF_BF(state->ref_value[1]); + r600_pipe_state_add_reg(rstate, + R_028434_DB_STENCILREFMASK_BF, tmp, + ~C_028434_STENCILREF_BF, NULL); + + free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); + rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_set_viewport_state(struct pipe_context *ctx, + const struct pipe_viewport_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + + if (rstate == NULL) + return; + + rctx->viewport = *state; + rstate->id = R600_PIPE_STATE_VIEWPORT; + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_VIEWPORT]); + rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, + const struct pipe_framebuffer_state *state, int cb) +{ + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + unsigned level = state->cbufs[cb]->level; + unsigned pitch, slice; + unsigned color_info; + unsigned format, swap, ntype; + const struct util_format_description *desc; + struct r600_bo *bo[3]; + + rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + rbuffer = &rtex->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + bo[2] = rbuffer->bo; + + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1; + ntype = 0; + desc = util_format_description(rtex->resource.base.b.format); + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + ntype = V_028C70_NUMBER_SRGB; + + format = r600_translate_colorformat(rtex->resource.base.b.format); + swap = r600_translate_colorswap(rtex->resource.base.b.format); + color_info = S_028C70_FORMAT(format) | + S_028C70_COMP_SWAP(swap) | + S_028C70_BLEND_CLAMP(1) | + S_028C70_NUMBER_TYPE(ntype); + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + color_info |= S_028C70_SOURCE_FORMAT(1); + + /* FIXME handle enabling of CB beyond BASE8 which has different offset */ + r600_pipe_state_add_reg(rstate, + R_028C60_CB_COLOR0_BASE + cb * 0x3C, + (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, + R_028C78_CB_COLOR0_DIM + cb * 0x3C, + 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028C70_CB_COLOR0_INFO + cb * 0x3C, + color_info, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, + R_028C64_CB_COLOR0_PITCH + cb * 0x3C, + S_028C64_PITCH_TILE_MAX(pitch), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028C68_CB_COLOR0_SLICE + cb * 0x3C, + S_028C68_SLICE_TILE_MAX(slice), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028C6C_CB_COLOR0_VIEW + cb * 0x3C, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, + S_028C74_NON_DISP_TILING_ORDER(1), + 0xFFFFFFFF, bo[0]); +} + +static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, + const struct pipe_framebuffer_state *state) +{ + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + unsigned level; + unsigned pitch, slice, format, stencil_format; + + if (state->zsbuf == NULL) + return; + + rtex = (struct r600_resource_texture*)state->zsbuf->texture; + rtex->tiled = 1; + rtex->array_mode = 2; + rtex->tile_type = 1; + rtex->depth = 1; + rbuffer = &rtex->resource; + + level = state->zsbuf->level; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; + format = r600_translate_dbformat(state->zsbuf->texture->format); + stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); + + r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, + (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, + (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + + if (stencil_format) { + uint32_t stencil_offset; + + stencil_offset = ((state->zsbuf->height * rtex->pitch_in_bytes[level]) + 255) & ~255; + r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, + (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, + (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + } + + r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, + S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo); + + r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, + S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format), + 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, + S_028058_PITCH_TILE_MAX(pitch), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE, + S_02805C_SLICE_TILE_MAX(slice), + 0xFFFFFFFF, NULL); +} + +static void evergreen_set_framebuffer_state(struct pipe_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 shader_mask, tl, br, target_mask; + + if (rstate == NULL) + return; + + /* unreference old buffer and reference new one */ + rstate->id = R600_PIPE_STATE_FRAMEBUFFER; + + util_copy_framebuffer_state(&rctx->framebuffer, state); + + rctx->pframebuffer = &rctx->framebuffer; + + /* build states */ + for (int i = 0; i < state->nr_cbufs; i++) { + evergreen_cb(rctx, rstate, state, i); + } + if (state->zsbuf) { + evergreen_db(rctx, rstate, state); + } + + target_mask = 0x00000000; + target_mask = 0xFFFFFFFF; + shader_mask = 0; + for (int i = 0; i < state->nr_cbufs; i++) { + target_mask ^= 0xf << (i * 4); + shader_mask |= 0xf << (i * 4); + } + tl = S_028240_TL_X(0) | S_028240_TL_Y(0); + br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); + + r600_pipe_state_add_reg(rstate, + R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028244_PA_SC_GENERIC_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028034_PA_SC_SCREEN_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028208_PA_SC_WINDOW_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, + 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, + 0x00000000, target_mask, NULL); + r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, + shader_mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, + 0x00000000, 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); + rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (ib) { + pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); + memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); + } else { + pipe_resource_reference(&rctx->index_buffer.buffer, NULL); + memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); + } + + /* TODO make this more like a state */ +} + +static void evergreen_set_vertex_buffers(struct pipe_context *ctx, unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + for (int i = 0; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + } + memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + for (int i = 0; i < count; i++) { + rctx->vertex_buffer[i].buffer = NULL; + if (r600_buffer_is_user_buffer(buffers[i].buffer)) + rctx->any_user_vbs = TRUE; + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + } + rctx->nvertex_buffer = count; +} + +static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer = (struct r600_resource*)buffer; + + switch (shader) { + case PIPE_SHADER_VERTEX: + rctx->vs_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028980_ALU_CONST_CACHE_VS_0, + (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + break; + case PIPE_SHADER_FRAGMENT: + rctx->ps_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028940_ALU_CONST_CACHE_PS_0, + (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + break; + default: + R600_ERR("unsupported %d\n", shader); + return; + } +} + +static void *evergreen_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); + int r; + + r = r600_pipe_shader_create(ctx, shader, state->tokens); + if (r) { + return NULL; + } + return shader; +} + +static void evergreen_bind_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->ps_shader = (struct r600_pipe_shader *)state; +} + +static void evergreen_bind_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->vs_shader = (struct r600_pipe_shader *)state; +} + +static void evergreen_delete_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->ps_shader == shader) { + rctx->ps_shader = NULL; + } + /* TODO proper delete */ + free(shader); +} + +static void evergreen_delete_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->vs_shader == shader) { + rctx->vs_shader = NULL; + } + /* TODO proper delete */ + free(shader); +} + +void evergreen_init_state_functions(struct r600_pipe_context *rctx) +{ + rctx->context.create_blend_state = evergreen_create_blend_state; + rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; + rctx->context.create_fs_state = evergreen_create_shader_state; + rctx->context.create_rasterizer_state = evergreen_create_rs_state; + rctx->context.create_sampler_state = evergreen_create_sampler_state; + rctx->context.create_sampler_view = evergreen_create_sampler_view; + rctx->context.create_vertex_elements_state = evergreen_create_vertex_elements; + rctx->context.create_vs_state = evergreen_create_shader_state; + rctx->context.bind_blend_state = evergreen_bind_blend_state; + rctx->context.bind_depth_stencil_alpha_state = evergreen_bind_state; + rctx->context.bind_fragment_sampler_states = evergreen_bind_ps_sampler; + rctx->context.bind_fs_state = evergreen_bind_ps_shader; + rctx->context.bind_rasterizer_state = evergreen_bind_rs_state; + rctx->context.bind_vertex_elements_state = evergreen_bind_vertex_elements; + rctx->context.bind_vertex_sampler_states = evergreen_bind_vs_sampler; + rctx->context.bind_vs_state = evergreen_bind_vs_shader; + rctx->context.delete_blend_state = evergreen_delete_state; + rctx->context.delete_depth_stencil_alpha_state = evergreen_delete_state; + rctx->context.delete_fs_state = evergreen_delete_ps_shader; + rctx->context.delete_rasterizer_state = evergreen_delete_rs_state; + rctx->context.delete_sampler_state = evergreen_delete_state; + rctx->context.delete_vertex_elements_state = evergreen_delete_vertex_element; + rctx->context.delete_vs_state = evergreen_delete_vs_shader; + rctx->context.set_blend_color = evergreen_set_blend_color; + rctx->context.set_clip_state = evergreen_set_clip_state; + rctx->context.set_constant_buffer = evergreen_set_constant_buffer; + rctx->context.set_fragment_sampler_views = evergreen_set_ps_sampler_view; + rctx->context.set_framebuffer_state = evergreen_set_framebuffer_state; + rctx->context.set_polygon_stipple = evergreen_set_polygon_stipple; + rctx->context.set_sample_mask = evergreen_set_sample_mask; + rctx->context.set_scissor_state = evergreen_set_scissor_state; + rctx->context.set_stencil_ref = evergreen_set_stencil_ref; + rctx->context.set_vertex_buffers = evergreen_set_vertex_buffers; + rctx->context.set_index_buffer = evergreen_set_index_buffer; + rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view; + rctx->context.set_viewport_state = evergreen_set_viewport_state; + rctx->context.sampler_view_destroy = evergreen_sampler_view_destroy; +} + +void evergreen_init_config(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state *rstate = &rctx->config; + int ps_prio; + int vs_prio; + int gs_prio; + int es_prio; + int hs_prio, cs_prio, ls_prio; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_hs_gprs; + int num_ls_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_hs_threads; + int num_ls_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + int num_hs_stack_entries; + int num_ls_stack_entries; + enum radeon_family family; + unsigned tmp; + + family = r600_get_family(rctx->radeon); + ps_prio = 0; + vs_prio = 1; + gs_prio = 2; + es_prio = 3; + hs_prio = 0; + ls_prio = 0; + cs_prio = 0; + + switch (family) { + case CHIP_CEDAR: + default: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 16; + num_gs_threads = 16; + num_es_threads = 16; + num_hs_threads = 16; + num_ls_threads = 16; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_REDWOOD: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_JUNIPER: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + } + + tmp = 0x00000000; + switch (family) { + case CHIP_CEDAR: + break; + default: + tmp |= S_008C00_VC_ENABLE(1); + break; + } + tmp |= S_008C00_EXPORT_SRC_C(1); + tmp |= S_008C00_CS_PRIO(cs_prio); + tmp |= S_008C00_LS_PRIO(ls_prio); + tmp |= S_008C00_HS_PRIO(hs_prio); + tmp |= S_008C00_PS_PRIO(ps_prio); + tmp |= S_008C00_VS_PRIO(vs_prio); + tmp |= S_008C00_GS_PRIO(gs_prio); + tmp |= S_008C00_ES_PRIO(es_prio); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); + tmp |= S_008C0C_NUM_LS_GPRS(num_ls_gprs); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C18_NUM_PS_THREADS(num_ps_threads); + tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads); + tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads); + tmp |= S_008C18_NUM_ES_THREADS(num_es_threads); + r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C1C_NUM_HS_THREADS(num_hs_threads); + tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads); + r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); + tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); + +// r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL); + +// r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); + +r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, + 0x0, 0xFFFFFFFF, NULL); + + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); +void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + unsigned i, j, offset, prim; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct pipe_vertex_buffer *vertex_buffer; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + struct r600_drawl draw; + + if (rctx->any_user_vbs) { + r600_upload_user_buffers(rctx); + rctx->any_user_vbs = FALSE; + } + + memset(&draw, 0, sizeof(struct r600_drawl)); + draw.ctx = ctx; + draw.mode = info->mode; + draw.start = info->start; + draw.count = info->count; + if (info->indexed && rctx->index_buffer.buffer) { + draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; + draw.min_index = info->min_index; + draw.max_index = info->max_index; + draw.index_bias = info->index_bias; + + r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, + &rctx->index_buffer.index_size, + &draw.start, + info->count); + + draw.index_size = rctx->index_buffer.index_size; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); + draw.index_buffer_offset = draw.start * draw.index_size; + draw.start = 0; + r600_upload_index_buffer(rctx, &draw); + } else { + draw.index_size = 0; + draw.index_buffer = NULL; + draw.min_index = info->min_index; + draw.max_index = info->max_index; + draw.index_bias = info->start; + } + + switch (draw.index_size) { + case 2: + vgt_draw_initiator = 0; + vgt_dma_index_type = 0; + break; + case 4: + vgt_draw_initiator = 0; + vgt_dma_index_type = 1; + break; + case 0: + vgt_draw_initiator = 2; + vgt_dma_index_type = 0; + break; + default: + R600_ERR("unsupported index size %d\n", draw.index_size); + return; + } + if (r600_conv_pipe_prim(draw.mode, &prim)) + return; + + /* rebuild vertex shader if input format changed */ + if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader)) + return; + if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader)) + return; + + for (i = 0 ; i < rctx->vertex_elements->count; i++) { + uint32_t word3, word2; + uint32_t format; + rstate = &rctx->vs_resource[i]; + + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + j = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vertex_buffer[j]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->elements[i].src_offset + + vertex_buffer->buffer_offset + + r600_bo_offset(rbuffer->bo); + + format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + + word2 = format | S_030008_STRIDE(vertex_buffer->stride); + + word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->elements[i].src_format); + + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, word3, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL); + evergreen_vs_resource_set(&rctx->ctx, rstate, i); + } + + mask = 0; + for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + mask |= (0xF << (i * 4)); + } + + vgt.id = R600_PIPE_STATE_VGT; + vgt.nregs = 0; + r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); + + if (rctx->rasterizer && rctx->framebuffer.zsbuf) { + float offset_units = rctx->rasterizer->offset_units; + unsigned offset_db_fmt_cntl = 0, depth; + + switch (rctx->framebuffer.zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + return; + } + offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + r600_pipe_state_add_reg(&vgt, + R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, + offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &vgt); + + rdraw.vgt_num_indices = draw.count; + rdraw.vgt_num_instances = 1; + rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.indices = NULL; + if (draw.index_buffer) { + rbuffer = (struct r600_resource*)draw.index_buffer; + rdraw.indices = rbuffer->bo; + rdraw.indices_bo_offset = draw.index_buffer_offset; + } + evergreen_context_draw(&rctx->ctx, &rdraw); + + pipe_resource_reference(&draw.index_buffer, NULL); +} + +void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; + int ninterp = 0; + boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; + unsigned spi_baryc_cntl; + + /* clear previous register */ + rstate->nregs = 0; + + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + /* evergreen NUM_INTERP only contains values interpolated into the LDS, + POSITION goes via GPRs from the SC so isn't counted */ + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + pos_index = i; + else if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + else { + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR || + rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + ninterp++; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + have_linear = TRUE; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + have_perspective = TRUE; + if (rshader->input[i].centroid) + have_centroid = TRUE; + } + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); + } + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_Z_EXPORT_ENABLE(1), + S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_STENCIL_EXPORT_ENABLE(1), + S_02880C_STENCIL_EXPORT_ENABLE(1), NULL); + } + + exports_ps = 0; + num_cout = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || + rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + exports_ps |= 1; + else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { + num_cout++; + } + } + exports_ps |= S_02884C_EXPORT_COLORS(num_cout); + if (!exports_ps) { + /* always at least export 1 component per pixel */ + exports_ps = 2; + } + + if (ninterp == 0) { + ninterp = 1; + have_perspective = TRUE; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) | + S_0286CC_PERSP_GRADIENT_ENA(have_perspective) | + S_0286CC_LINEAR_GRADIENT_ENA(have_linear); + spi_input_z = 0; + if (pos_index != -1) { + spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr); + spi_input_z |= 1; + } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + spi_baryc_cntl = 0; + if (have_perspective) + spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) | + S_0286E0_PERSP_CENTROID_ENA(have_centroid); + if (have_linear) + spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) | + S_0286E0_LINEAR_CENTROID_ENA(have_centroid); + + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, + spi_ps_in_control_0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, + spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2, + 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0286E0_SPI_BARYC_CNTL, + spi_baryc_cntl, + 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, + R_028840_SQ_PGM_START_PS, + (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028844_SQ_PGM_RESOURCES_PS, + S_028844_NUM_GPRS(rshader->bc.ngpr) | + S_028844_PRIME_CACHE_ON_DRAW(1) | + S_028844_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028848_SQ_PGM_RESOURCES_2_PS, + 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02884C_SQ_PGM_EXPORTS_PS, + exports_ps, 0xFFFFFFFF, NULL); + + if (rshader->uses_kill) { + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_KILL_ENABLE(1), + S_02880C_KILL_ENABLE(1), NULL); + } + + r600_pipe_state_add_reg(rstate, + R_03A200_SQ_LOOP_CONST_0, 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned spi_vs_out_id[10]; + unsigned i, tmp; + + /* clear previous register */ + rstate->nregs = 0; + + /* so far never got proper semantic id from tgsi */ + for (i = 0; i < 10; i++) { + spi_vs_out_id[i] = 0; + } + for (i = 0; i < 32; i++) { + tmp = i << ((i & 3) * 8); + spi_vs_out_id[i / 4] |= tmp; + } + for (i = 0; i < 10; i++) { + r600_pipe_state_add_reg(rstate, + R_02861C_SPI_VS_OUT_ID_0 + i * 4, + spi_vs_out_id[i], 0xFFFFFFFF, NULL); + } + + r600_pipe_state_add_reg(rstate, + R_0286C4_SPI_VS_OUT_CONFIG, + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028860_SQ_PGM_RESOURCES_VS, + S_028860_NUM_GPRS(rshader->bc.ngpr) | + S_028860_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028864_SQ_PGM_RESOURCES_2_VS, + 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288A8_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02885C_SQ_PGM_START_VS, + (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_0288A4_SQ_PGM_START_FS, + (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + + r600_pipe_state_add_reg(rstate, + R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) +{ + struct pipe_depth_stencil_alpha_state dsa; + struct r600_pipe_state *rstate; + + memset(&dsa, 0, sizeof(dsa)); + + rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + 0x0, + S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + r600_pipe_state_add_reg(rstate, + R_028000_DB_RENDER_CONTROL, + S_028000_DEPTH_COPY_ENABLE(1) | + S_028000_STENCIL_COPY_ENABLE(1) | + S_028000_COPY_CENTROID(1), + S_028000_DEPTH_COPY_ENABLE(1) | + S_028000_STENCIL_COPY_ENABLE(1) | + S_028000_COPY_CENTROID(1), NULL); + return rstate; +} diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 77cd8f1588..8e96f9355e 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -26,6 +26,26 @@ #ifndef EVERGREEND_H #define EVERGREEND_H +/* evergreen values */ +#define EVERGREEN_CONFIG_REG_OFFSET 0X00008000 +#define EVERGREEN_CONFIG_REG_END 0X0000AC00 +#define EVERGREEN_CONTEXT_REG_OFFSET 0X00028000 +#define EVERGREEN_CONTEXT_REG_END 0X00029000 +#define EVERGREEN_RESOURCE_OFFSET 0x00030000 +#define EVERGREEN_RESOURCE_END 0x00034000 +#define EVERGREEN_LOOP_CONST_OFFSET 0x0003A200 +#define EVERGREEN_LOOP_CONST_END 0x0003A26C +#define EVERGREEN_BOOL_CONST_OFFSET 0x0003A500 +#define EVERGREEN_BOOL_CONST_END 0x0003A506 +#define EVERGREEN_SAMPLER_OFFSET 0X0003C000 +#define EVERGREEN_SAMPLER_END 0X0003CFF0 + +#define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0 +#define EVERGREEN_CTL_CONST_END 0x0003E200 + +#define EVENT_TYPE_ZPASS_DONE 0x15 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 + #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 #define PKT3_NOP 0x10 @@ -309,7 +329,7 @@ #define S_028C74_NON_DISP_TILING_ORDER(x) (((x) & 0x1) << 4) #define G_028C74_NON_DISP_TILING_ORDER(x) (((x) >> 4) & 0x1) #define C_028C74_NON_DISP_TILING_ORDER 0xFFFFFFEF - + #define R_028C78_CB_COLOR0_DIM 0x028C78 #define S_028C78_WIDTH_MAX(x) (((x) & 0xFFFF) << 0) #define G_028C78_WIDTH_MAX(x) (((x) >> 0) & 0xFFFF) @@ -662,60 +682,13 @@ #define S_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) & 0x3FF) << 0) #define G_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) >> 0) & 0x3FF) #define C_028D34_DEPTH_HEIGHT_TILE_MAX 0xFFFFFC00 -#define R_028D0C_DB_RENDER_CONTROL 0x028D0C -#define S_028D0C_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) -#define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) -#define S_028D0C_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15) -#define R_028D10_DB_RENDER_OVERRIDE 0x028D10 -#define V_028D10_FORCE_OFF 0 -#define V_028D10_FORCE_ENABLE 1 -#define V_028D10_FORCE_DISABLE 2 -#define S_028D10_FORCE_HIZ_ENABLE(x) (((x) & 0x3) << 0) -#define G_028D10_FORCE_HIZ_ENABLE(x) (((x) >> 0) & 0x3) -#define C_028D10_FORCE_HIZ_ENABLE 0xFFFFFFFC -#define S_028D10_FORCE_HIS_ENABLE0(x) (((x) & 0x3) << 2) -#define G_028D10_FORCE_HIS_ENABLE0(x) (((x) >> 2) & 0x3) -#define C_028D10_FORCE_HIS_ENABLE0 0xFFFFFFF3 -#define S_028D10_FORCE_HIS_ENABLE1(x) (((x) & 0x3) << 4) -#define G_028D10_FORCE_HIS_ENABLE1(x) (((x) >> 4) & 0x3) -#define C_028D10_FORCE_HIS_ENABLE1 0xFFFFFFCF -#define S_028D10_FORCE_SHADER_Z_ORDER(x) (((x) & 0x1) << 6) -#define G_028D10_FORCE_SHADER_Z_ORDER(x) (((x) >> 6) & 0x1) -#define C_028D10_FORCE_SHADER_Z_ORDER 0xFFFFFFBF -#define S_028D10_FAST_Z_DISABLE(x) (((x) & 0x1) << 7) -#define G_028D10_FAST_Z_DISABLE(x) (((x) >> 7) & 0x1) -#define C_028D10_FAST_Z_DISABLE 0xFFFFFF7F -#define S_028D10_FAST_STENCIL_DISABLE(x) (((x) & 0x1) << 8) -#define G_028D10_FAST_STENCIL_DISABLE(x) (((x) >> 8) & 0x1) -#define C_028D10_FAST_STENCIL_DISABLE 0xFFFFFEFF -#define S_028D10_NOOP_CULL_DISABLE(x) (((x) & 0x1) << 9) -#define G_028D10_NOOP_CULL_DISABLE(x) (((x) >> 9) & 0x1) -#define C_028D10_NOOP_CULL_DISABLE 0xFFFFFDFF -#define S_028D10_FORCE_COLOR_KILL(x) (((x) & 0x1) << 10) -#define G_028D10_FORCE_COLOR_KILL(x) (((x) >> 10) & 0x1) -#define C_028D10_FORCE_COLOR_KILL 0xFFFFFBFF -#define S_028D10_FORCE_Z_READ(x) (((x) & 0x1) << 11) -#define G_028D10_FORCE_Z_READ(x) (((x) >> 11) & 0x1) -#define C_028D10_FORCE_Z_READ 0xFFFFF7FF -#define S_028D10_FORCE_STENCIL_READ(x) (((x) & 0x1) << 12) -#define G_028D10_FORCE_STENCIL_READ(x) (((x) >> 12) & 0x1) -#define C_028D10_FORCE_STENCIL_READ 0xFFFFEFFF -#define S_028D10_FORCE_FULL_Z_RANGE(x) (((x) & 0x3) << 13) -#define G_028D10_FORCE_FULL_Z_RANGE(x) (((x) >> 13) & 0x3) -#define C_028D10_FORCE_FULL_Z_RANGE 0xFFFF9FFF -#define S_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) & 0x1) << 15) -#define G_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) >> 15) & 0x1) -#define C_028D10_FORCE_QC_SMASK_CONFLICT 0xFFFF7FFF -#define S_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) & 0x1) << 16) -#define G_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) >> 16) & 0x1) -#define C_028D10_DISABLE_VIEWPORT_CLAMP 0xFFFEFFFF -#define S_028D10_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) -#define G_028D10_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) -#define C_028D10_IGNORE_SC_ZRANGE 0xFFFDFFFF #define R_02880C_DB_SHADER_CONTROL 0x02880C #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE +#define S_02880C_STENCIL_EXPORT_ENABLE(x) (((x) & 0x1) << 1) +#define G_02880C_STENCIL_EXPORT_ENABLE(x) (((x) >> 1) & 0x1) +#define C_02880C_STENCIL_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) #define C_02880C_Z_ORDER 0xFFFFFCFF @@ -729,29 +702,6 @@ #define S_02880C_DUAL_EXPORT_ENABLE(x) (((x) & 0x1) << 9) #define G_02880C_DUAL_EXPORT_ENABLE(x) (((x) >> 9) & 0x1) #define C_02880C_DUAL_EXPORT_ENABLE 0xFFFFFDFF -#define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 -#define S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) & 0xFF) << 0) -#define G_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) >> 0) & 0xFF) -#define C_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS 0xFFFFFF00 -#define S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) & 0x1) << 8) -#define G_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) >> 8) & 0x1) -#define C_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT 0xFFFFFEFF -#define R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE 0x028E00 -#define S_028E00_SCALE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028E00_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028E00_SCALE 0x00000000 -#define R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET 0x028E04 -#define S_028E04_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028E04_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028E04_OFFSET 0x00000000 -#define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 -#define S_028E08_SCALE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028E08_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028E08_SCALE 0x00000000 -#define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C -#define S_028E0C_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028E0C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028E0C_OFFSET 0x00000000 #define R_028A00_PA_SU_POINT_SIZE 0x028A00 #define S_028A00_HEIGHT(x) (((x) & 0xFFFF) << 0) #define G_028A00_HEIGHT(x) (((x) >> 0) & 0xFFFF) @@ -1037,9 +987,6 @@ #define S_030010_ENDIAN_SWAP(x) (((x) & 0x3) << 12) #define G_030010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3) #define C_030010_ENDIAN_SWAP 0xFFFFCFFF -#define S_030010_REQUEST_SIZE(x) (((x) & 0x3) << 14) -#define G_030010_REQUEST_SIZE(x) (((x) >> 14) & 0x3) -#define C_030010_REQUEST_SIZE 0xFFFF3FFF #define S_030010_DST_SEL_X(x) (((x) & 0x7) << 16) #define G_030010_DST_SEL_X(x) (((x) >> 16) & 0x7) #define C_030010_DST_SEL_X 0xFFF8FFFF @@ -1103,43 +1050,12 @@ #define S_030008_DATA_FORMAT(x) (((x) & 0x3F) << 20) #define G_030008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) #define C_030008_DATA_FORMAT 0xFC0FFFFF -#define V_030008_COLOR_INVALID 0x00000000 -#define V_030008_COLOR_8 0x00000001 -#define V_030008_COLOR_4_4 0x00000002 -#define V_030008_COLOR_3_3_2 0x00000003 -#define V_030008_COLOR_16 0x00000005 -#define V_030008_COLOR_16_FLOAT 0x00000006 -#define V_030008_COLOR_8_8 0x00000007 -#define V_030008_COLOR_5_6_5 0x00000008 -#define V_030008_COLOR_6_5_5 0x00000009 -#define V_030008_COLOR_1_5_5_5 0x0000000A -#define V_030008_COLOR_4_4_4_4 0x0000000B -#define V_030008_COLOR_5_5_5_1 0x0000000C -#define V_030008_COLOR_32 0x0000000D -#define V_030008_COLOR_32_FLOAT 0x0000000E -#define V_030008_COLOR_16_16 0x0000000F -#define V_030008_COLOR_16_16_FLOAT 0x00000010 -#define V_030008_COLOR_8_24 0x00000011 -#define V_030008_COLOR_8_24_FLOAT 0x00000012 -#define V_030008_COLOR_24_8 0x00000013 -#define V_030008_COLOR_24_8_FLOAT 0x00000014 -#define V_030008_COLOR_10_11_11 0x00000015 -#define V_030008_COLOR_10_11_11_FLOAT 0x00000016 -#define V_030008_COLOR_11_11_10 0x00000017 -#define V_030008_COLOR_11_11_10_FLOAT 0x00000018 -#define V_030008_COLOR_2_10_10_10 0x00000019 -#define V_030008_COLOR_8_8_8_8 0x0000001A -#define V_030008_COLOR_10_10_10_2 0x0000001B -#define V_030008_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_030008_COLOR_32_32 0x0000001D -#define V_030008_COLOR_32_32_FLOAT 0x0000001E -#define V_030008_COLOR_16_16_16_16 0x0000001F -#define V_030008_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_030008_COLOR_32_32_32_32 0x00000022 -#define V_030008_COLOR_32_32_32_32_FLOAT 0x00000023 #define S_030008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) #define G_030008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) #define C_030008_NUM_FORMAT_ALL 0xF3FFFFFF +#define V_030008_SQ_NUM_FORMAT_NORM 0x00000000 +#define V_030008_SQ_NUM_FORMAT_INT 0x00000001 +#define V_030008_SQ_NUM_FORMAT_SCALED 0x00000002 #define S_030008_FORMAT_COMP_ALL(x) (((x) & 0x1) << 28) #define G_030008_FORMAT_COMP_ALL(x) (((x) >> 28) & 0x1) #define C_030008_FORMAT_COMP_ALL 0xEFFFFFFF @@ -1166,6 +1082,22 @@ #define S_03000C_DST_SEL_W(x) (((x) & 0x7) << 12) #define G_03000C_DST_SEL_W(x) (((x) >> 12) & 0x7) +#define R_00A400_TD_PS_SAMPLER0_BORDER_INDEX 0x00A400 +#define R_00A404_TD_PS_SAMPLER0_BORDER_RED 0x00A404 +#define R_00A408_TD_PS_SAMPLER0_BORDER_GREEN 0x00A408 +#define R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE 0x00A40C +#define R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA 0x00A410 +#define R_00A414_TD_VS_SAMPLER0_BORDER_INDEX 0x00A414 +#define R_00A418_TD_VS_SAMPLER0_BORDER_RED 0x00A418 +#define R_00A41C_TD_VS_SAMPLER0_BORDER_GREEN 0x00A41C +#define R_00A420_TD_VS_SAMPLER0_BORDER_BLUE 0x00A420 +#define R_00A424_TD_VS_SAMPLER0_BORDER_ALPHA 0x00A424 +#define R_00A428_TD_GS_SAMPLER0_BORDER_INDEX 0x00A428 +#define R_00A42C_TD_GS_SAMPLER0_BORDER_RED 0x00A42C +#define R_00A430_TD_GS_SAMPLER0_BORDER_GREEN 0x00A430 +#define R_00A434_TD_GS_SAMPLER0_BORDER_BLUE 0x00A434 +#define R_00A438_TD_GS_SAMPLER0_BORDER_ALPHA 0x00A438 + #define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000 #define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0) #define G_03C000_CLAMP_X(x) (((x) >> 0) & 0x7) @@ -1456,4 +1388,488 @@ #define SQ_TEX_INST_SAMPLE 0x10 #define SQ_TEX_INST_SAMPLE_L 0x11 #define SQ_TEX_INST_SAMPLE_C 0x18 + +#define R_008A14_PA_CL_ENHANCE 0x00008A14 +#define R_008C0C_SQ_THREAD_RESOURCE_MGMT 0x00008C0C +#define R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x00008D8C +#define R_028000_DB_RENDER_CONTROL 0x00028000 +#define S_028000_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0) +#define S_028000_STENCIL_CLEAR_ENABLE(x) (((x) & 0x1) << 1) +#define S_028000_DEPTH_COPY_ENABLE(x) (((x) & 0x1) << 2) +#define S_028000_STENCIL_COPY_ENABLE(x) (((x) & 0x1) << 3) +#define S_028000_RESUMMARIZE_ENABLE(x) (((x) & 0x1) << 4) +#define S_028000_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) +#define S_028000_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) +#define S_028000_COPY_CENTROID(x) (((x) & 0x1) << 7) +#define S_028000_COPY_SAMPLE(x) (((x) & 0x7) << 8) +#define S_028000_COLOR_DISABLE(x) (((x) & 0x1) << 12) +#define R_028004_DB_COUNT_CONTROL 0x00028004 +#define S_028004_ZPASS_INCREMENT_DISABLE (((x) & 0x1) << 0) +#define S_028004_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 1) +#define R_028008_DB_DEPTH_VIEW 0x00028008 +#define R_02800C_DB_RENDER_OVERRIDE 0x0002800C +#define V_02800C_FORCE_OFF 0 +#define V_02800C_FORCE_ENABLE 1 +#define V_02800C_FORCE_DISABLE 2 +#define S_02800C_FORCE_HIZ_ENABLE(x) (((x) & 0x3) << 0) +#define G_02800C_FORCE_HIZ_ENABLE(x) (((x) >> 0) & 0x3) +#define C_02800C_FORCE_HIZ_ENABLE 0xFFFFFFFC +#define S_02800C_FORCE_HIS_ENABLE0(x) (((x) & 0x3) << 2) +#define G_02800C_FORCE_HIS_ENABLE0(x) (((x) >> 2) & 0x3) +#define C_02800C_FORCE_HIS_ENABLE0 0xFFFFFFF3 +#define S_02800C_FORCE_HIS_ENABLE1(x) (((x) & 0x3) << 4) +#define G_02800C_FORCE_HIS_ENABLE1(x) (((x) >> 4) & 0x3) +#define C_02800C_FORCE_HIS_ENABLE1 0xFFFFFFCF +#define S_02800C_FORCE_SHADER_Z_ORDER(x) (((x) & 0x1) << 6) +#define G_02800C_FORCE_SHADER_Z_ORDER(x) (((x) >> 6) & 0x1) +#define C_02800C_FORCE_SHADER_Z_ORDER 0xFFFFFFBF +#define S_02800C_FAST_Z_DISABLE(x) (((x) & 0x1) << 7) +#define G_02800C_FAST_Z_DISABLE(x) (((x) >> 7) & 0x1) +#define C_02800C_FAST_Z_DISABLE 0xFFFFFF7F +#define S_02800C_FAST_STENCIL_DISABLE(x) (((x) & 0x1) << 8) +#define G_02800C_FAST_STENCIL_DISABLE(x) (((x) >> 8) & 0x1) +#define C_02800C_FAST_STENCIL_DISABLE 0xFFFFFEFF +#define S_02800C_NOOP_CULL_DISABLE(x) (((x) & 0x1) << 9) +#define G_02800C_NOOP_CULL_DISABLE(x) (((x) >> 9) & 0x1) +#define C_02800C_NOOP_CULL_DISABLE 0xFFFFFDFF +#define S_02800C_FORCE_COLOR_KILL(x) (((x) & 0x1) << 10) +#define G_02800C_FORCE_COLOR_KILL(x) (((x) >> 10) & 0x1) +#define C_02800C_FORCE_COLOR_KILL 0xFFFFFBFF +#define S_02800C_FORCE_Z_READ(x) (((x) & 0x1) << 11) +#define G_02800C_FORCE_Z_READ(x) (((x) >> 11) & 0x1) +#define C_02800C_FORCE_Z_READ 0xFFFFF7FF +#define S_02800C_FORCE_STENCIL_READ(x) (((x) & 0x1) << 12) +#define G_02800C_FORCE_STENCIL_READ(x) (((x) >> 12) & 0x1) +#define C_02800C_FORCE_STENCIL_READ 0xFFFFEFFF +#define S_02800C_FORCE_FULL_Z_RANGE(x) (((x) & 0x3) << 13) +#define G_02800C_FORCE_FULL_Z_RANGE(x) (((x) >> 13) & 0x3) +#define C_02800C_FORCE_FULL_Z_RANGE 0xFFFF9FFF +#define S_02800C_FORCE_QC_SMASK_CONFLICT(x) (((x) & 0x1) << 15) +#define G_02800C_FORCE_QC_SMASK_CONFLICT(x) (((x) >> 15) & 0x1) +#define C_02800C_FORCE_QC_SMASK_CONFLICT 0xFFFF7FFF +#define S_02800C_DISABLE_VIEWPORT_CLAMP(x) (((x) & 0x1) << 16) +#define G_02800C_DISABLE_VIEWPORT_CLAMP(x) (((x) >> 16) & 0x1) +#define C_02800C_DISABLE_VIEWPORT_CLAMP 0xFFFEFFFF +#define S_02800C_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) +#define G_02800C_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) +#define C_02800C_IGNORE_SC_ZRANGE 0xFFFDFFFF +#define R_028010_DB_RENDER_OVERRIDE2 0x00028010 +#define R_028014_DB_HTILE_DATA_BASE 0x00028014 +#define R_028028_DB_STENCIL_CLEAR 0x00028028 +#define R_02802C_DB_DEPTH_CLEAR 0x0002802C +#define R_028048_DB_Z_READ_BASE 0x00028048 +#define R_02804C_DB_STENCIL_READ_BASE 0x0002804C +#define R_028050_DB_Z_WRITE_BASE 0x00028050 +#define R_028054_DB_STENCIL_WRITE_BASE 0x00028054 +#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140 +#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180 +#define R_028200_PA_SC_WINDOW_OFFSET 0x00028200 +#define R_02820C_PA_SC_CLIPRECT_RULE 0x0002820C +#define R_028210_PA_SC_CLIPRECT_0_TL 0x00028210 +#define R_028214_PA_SC_CLIPRECT_0_BR 0x00028214 +#define R_028218_PA_SC_CLIPRECT_1_TL 0x00028218 +#define R_02821C_PA_SC_CLIPRECT_1_BR 0x0002821C +#define R_028220_PA_SC_CLIPRECT_2_TL 0x00028220 +#define R_028224_PA_SC_CLIPRECT_2_BR 0x00028224 +#define R_028228_PA_SC_CLIPRECT_3_TL 0x00028228 +#define R_02822C_PA_SC_CLIPRECT_3_BR 0x0002822C +#define R_028230_PA_SC_EDGERULE 0x00028230 +#define R_028234_PA_SU_HARDWARE_SCREEN_OFFSET 0x00028234 +#define R_028238_CB_TARGET_MASK 0x00028238 +#define R_02823C_CB_SHADER_MASK 0x0002823C +#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x00028250 +#define R_028254_PA_SC_VPORT_SCISSOR_0_BR 0x00028254 +#define R_028350_SX_MISC 0x00028350 +#define R_028380_SQ_VTX_SEMANTIC_0 0x00028380 +#define R_028384_SQ_VTX_SEMANTIC_1 0x00028384 +#define R_028388_SQ_VTX_SEMANTIC_2 0x00028388 +#define R_02838C_SQ_VTX_SEMANTIC_3 0x0002838C +#define R_028390_SQ_VTX_SEMANTIC_4 0x00028390 +#define R_028394_SQ_VTX_SEMANTIC_5 0x00028394 +#define R_028398_SQ_VTX_SEMANTIC_6 0x00028398 +#define R_02839C_SQ_VTX_SEMANTIC_7 0x0002839C +#define R_0283A0_SQ_VTX_SEMANTIC_8 0x000283A0 +#define R_0283A4_SQ_VTX_SEMANTIC_9 0x000283A4 +#define R_0283A8_SQ_VTX_SEMANTIC_10 0x000283A8 +#define R_0283AC_SQ_VTX_SEMANTIC_11 0x000283AC +#define R_0283B0_SQ_VTX_SEMANTIC_12 0x000283B0 +#define R_0283B4_SQ_VTX_SEMANTIC_13 0x000283B4 +#define R_0283B8_SQ_VTX_SEMANTIC_14 0x000283B8 +#define R_0283BC_SQ_VTX_SEMANTIC_15 0x000283BC +#define R_0283C0_SQ_VTX_SEMANTIC_16 0x000283C0 +#define R_0283C4_SQ_VTX_SEMANTIC_17 0x000283C4 +#define R_0283C8_SQ_VTX_SEMANTIC_18 0x000283C8 +#define R_0283CC_SQ_VTX_SEMANTIC_19 0x000283CC +#define R_0283D0_SQ_VTX_SEMANTIC_20 0x000283D0 +#define R_0283D4_SQ_VTX_SEMANTIC_21 0x000283D4 +#define R_0283D8_SQ_VTX_SEMANTIC_22 0x000283D8 +#define R_0283DC_SQ_VTX_SEMANTIC_23 0x000283DC +#define R_0283E0_SQ_VTX_SEMANTIC_24 0x000283E0 +#define R_0283E4_SQ_VTX_SEMANTIC_25 0x000283E4 +#define R_0283E8_SQ_VTX_SEMANTIC_26 0x000283E8 +#define R_0283EC_SQ_VTX_SEMANTIC_27 0x000283EC +#define R_0283F0_SQ_VTX_SEMANTIC_28 0x000283F0 +#define R_0283F4_SQ_VTX_SEMANTIC_29 0x000283F4 +#define R_0283F8_SQ_VTX_SEMANTIC_30 0x000283F8 +#define R_0283FC_SQ_VTX_SEMANTIC_31 0x000283FC +#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x000282D0 +#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x000282D4 +#define R_028400_VGT_MAX_VTX_INDX 0x00028400 +#define R_028404_VGT_MIN_VTX_INDX 0x00028404 +#define R_028408_VGT_INDX_OFFSET 0x00028408 +#define R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX 0x0002840C +#define R_028414_CB_BLEND_RED 0x00028414 +#define R_028418_CB_BLEND_GREEN 0x00028418 +#define R_02841C_CB_BLEND_BLUE 0x0002841C +#define R_028420_CB_BLEND_ALPHA 0x00028420 +#define R_028438_SX_ALPHA_REF 0x00028438 +#define R_02843C_PA_CL_VPORT_XSCALE_0 0x0002843C +#define R_028440_PA_CL_VPORT_XOFFSET_0 0x00028440 +#define R_028444_PA_CL_VPORT_YSCALE_0 0x00028444 +#define R_028448_PA_CL_VPORT_YOFFSET_0 0x00028448 +#define R_02844C_PA_CL_VPORT_ZSCALE_0 0x0002844C +#define R_028450_PA_CL_VPORT_ZOFFSET_0 0x00028450 +#define R_0285BC_PA_CL_UCP0_X 0x000285BC +#define R_0285C0_PA_CL_UCP0_Y 0x000285C0 +#define R_0285C4_PA_CL_UCP0_Z 0x000285C4 +#define R_0285C8_PA_CL_UCP0_W 0x000285C8 +#define R_0285CC_PA_CL_UCP1_X 0x000285CC +#define R_0285D0_PA_CL_UCP1_Y 0x000285D0 +#define R_0285D4_PA_CL_UCP1_Z 0x000285D4 +#define R_0285D8_PA_CL_UCP1_W 0x000285D8 +#define R_0285DC_PA_CL_UCP2_X 0x000285DC +#define R_0285E0_PA_CL_UCP2_Y 0x000285E0 +#define R_0285E4_PA_CL_UCP2_Z 0x000285E4 +#define R_0285E8_PA_CL_UCP2_W 0x000285E8 +#define R_0285EC_PA_CL_UCP3_X 0x000285EC +#define R_0285F0_PA_CL_UCP3_Y 0x000285F0 +#define R_0285F4_PA_CL_UCP3_Z 0x000285F4 +#define R_0285F8_PA_CL_UCP3_W 0x000285F8 +#define R_0285FC_PA_CL_UCP4_X 0x000285FC +#define R_028600_PA_CL_UCP4_Y 0x00028600 +#define R_028604_PA_CL_UCP4_Z 0x00028604 +#define R_028608_PA_CL_UCP4_W 0x00028608 +#define R_02860C_PA_CL_UCP5_X 0x0002860C +#define R_028610_PA_CL_UCP5_Y 0x00028610 +#define R_028614_PA_CL_UCP5_Z 0x00028614 +#define R_028618_PA_CL_UCP5_W 0x00028618 +#define R_02861C_SPI_VS_OUT_ID_0 0x0002861C +#define R_028620_SPI_VS_OUT_ID_1 0x00028620 +#define R_028624_SPI_VS_OUT_ID_2 0x00028624 +#define R_028628_SPI_VS_OUT_ID_3 0x00028628 +#define R_02862C_SPI_VS_OUT_ID_4 0x0002862C +#define R_028630_SPI_VS_OUT_ID_5 0x00028630 +#define R_028634_SPI_VS_OUT_ID_6 0x00028634 +#define R_028638_SPI_VS_OUT_ID_7 0x00028638 +#define R_02863C_SPI_VS_OUT_ID_8 0x0002863C +#define R_028640_SPI_VS_OUT_ID_9 0x00028640 +#define R_028648_SPI_PS_INPUT_CNTL_1 0x00028648 +#define R_02864C_SPI_PS_INPUT_CNTL_2 0x0002864C +#define R_028650_SPI_PS_INPUT_CNTL_3 0x00028650 +#define R_028654_SPI_PS_INPUT_CNTL_4 0x00028654 +#define R_028658_SPI_PS_INPUT_CNTL_5 0x00028658 +#define R_02865C_SPI_PS_INPUT_CNTL_6 0x0002865C +#define R_028660_SPI_PS_INPUT_CNTL_7 0x00028660 +#define R_028664_SPI_PS_INPUT_CNTL_8 0x00028664 +#define R_028668_SPI_PS_INPUT_CNTL_9 0x00028668 +#define R_02866C_SPI_PS_INPUT_CNTL_10 0x0002866C +#define R_028670_SPI_PS_INPUT_CNTL_11 0x00028670 +#define R_028674_SPI_PS_INPUT_CNTL_12 0x00028674 +#define R_028678_SPI_PS_INPUT_CNTL_13 0x00028678 +#define R_02867C_SPI_PS_INPUT_CNTL_14 0x0002867C +#define R_028680_SPI_PS_INPUT_CNTL_15 0x00028680 +#define R_028684_SPI_PS_INPUT_CNTL_16 0x00028684 +#define R_028688_SPI_PS_INPUT_CNTL_17 0x00028688 +#define R_02868C_SPI_PS_INPUT_CNTL_18 0x0002868C +#define R_028690_SPI_PS_INPUT_CNTL_19 0x00028690 +#define R_028694_SPI_PS_INPUT_CNTL_20 0x00028694 +#define R_028698_SPI_PS_INPUT_CNTL_21 0x00028698 +#define R_02869C_SPI_PS_INPUT_CNTL_22 0x0002869C +#define R_0286A0_SPI_PS_INPUT_CNTL_23 0x000286A0 +#define R_0286A4_SPI_PS_INPUT_CNTL_24 0x000286A4 +#define R_0286A8_SPI_PS_INPUT_CNTL_25 0x000286A8 +#define R_0286AC_SPI_PS_INPUT_CNTL_26 0x000286AC +#define R_0286B0_SPI_PS_INPUT_CNTL_27 0x000286B0 +#define R_0286B4_SPI_PS_INPUT_CNTL_28 0x000286B4 +#define R_0286B8_SPI_PS_INPUT_CNTL_29 0x000286B8 +#define R_0286BC_SPI_PS_INPUT_CNTL_30 0x000286BC +#define R_0286C0_SPI_PS_INPUT_CNTL_31 0x000286C0 +#define R_0286C8_SPI_THREAD_GROUPING 0x000286C8 +#define R_0286D8_SPI_INPUT_Z 0x000286D8 +#define R_0286DC_SPI_FOG_CNTL 0x000286DC +#define R_0286E4_SPI_PS_IN_CONTROL_2 0x000286E4 +#define R_0286E8_SPI_COMPUTE_INPUT_CNTL 0x000286E8 +#define R_028780_CB_BLEND0_CONTROL 0x00028780 +#define R_028784_CB_BLEND1_CONTROL 0x00028784 +#define R_028788_CB_BLEND2_CONTROL 0x00028788 +#define R_02878C_CB_BLEND3_CONTROL 0x0002878C +#define R_028790_CB_BLEND4_CONTROL 0x00028790 +#define R_028794_CB_BLEND5_CONTROL 0x00028794 +#define R_028798_CB_BLEND6_CONTROL 0x00028798 +#define R_02879C_CB_BLEND7_CONTROL 0x0002879C +#define R_028818_PA_CL_VTE_CNTL 0x00028818 +#define R_028820_PA_CL_NANINF_CNTL 0x00028820 +#define R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1 0x00028838 +#define R_028840_SQ_PGM_START_PS 0x00028840 +#define R_02884C_SQ_PGM_EXPORTS_PS 0x0002884C +#define S_02884C_EXPORT_COLORS(x) (((x) & 0xF) << 1) +#define G_02884C_EXPORT_COLORS(x) (((x) >> 1) & 0xF) +#define C_02884C_EXPORT_COLORS 0xFFFFFFE1 +#define S_02884C_EXPORT_Z(x) (((x) & 0x1) << 0) +#define G_02884C_EXPORT_Z(x) (((x) >> 0) & 0x1) +#define C_02884C_EXPORT_Z 0xFFFFFFFE +#define R_02885C_SQ_PGM_START_VS 0x0002885C +#define R_0288A4_SQ_PGM_START_FS 0x000288A4 +#define R_0288A8_SQ_PGM_RESOURCES_FS 0x000288A8 +#define R_0288EC_SQ_LDS_ALLOC_PS 0x000288EC +#define R_028900_SQ_ESGS_RING_ITEMSIZE 0x00028900 +#define R_028904_SQ_GSVS_RING_ITEMSIZE 0x00028904 +#define R_028908_SQ_ESTMP_RING_ITEMSIZE 0x00028908 +#define R_02890C_SQ_GSTMP_RING_ITEMSIZE 0x0002890C +#define R_028910_SQ_VSTMP_RING_ITEMSIZE 0x00028910 +#define R_028914_SQ_PSTMP_RING_ITEMSIZE 0x00028914 +#define R_02891C_SQ_GS_VERT_ITEMSIZE 0x0002891C +#define R_028920_SQ_GS_VERT_ITEMSIZE_1 0x00028920 +#define R_028924_SQ_GS_VERT_ITEMSIZE_2 0x00028924 +#define R_028928_SQ_GS_VERT_ITEMSIZE_3 0x00028928 +#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940 +#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980 +#define R_028A04_PA_SU_POINT_MINMAX 0x00028A04 +#define R_028A08_PA_SU_LINE_CNTL 0x00028A08 +#define R_028A10_VGT_OUTPUT_PATH_CNTL 0x00028A10 +#define R_028A14_VGT_HOS_CNTL 0x00028A14 +#define R_028A18_VGT_HOS_MAX_TESS_LEVEL 0x00028A18 +#define R_028A1C_VGT_HOS_MIN_TESS_LEVEL 0x00028A1C +#define R_028A20_VGT_HOS_REUSE_DEPTH 0x00028A20 +#define R_028A24_VGT_GROUP_PRIM_TYPE 0x00028A24 +#define R_028A28_VGT_GROUP_FIRST_DECR 0x00028A28 +#define R_028A2C_VGT_GROUP_DECR 0x00028A2C +#define R_028A30_VGT_GROUP_VECT_0_CNTL 0x00028A30 +#define R_028A34_VGT_GROUP_VECT_1_CNTL 0x00028A34 +#define R_028A38_VGT_GROUP_VECT_0_FMT_CNTL 0x00028A38 +#define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL 0x00028A3C +#define R_028A48_PA_SC_MODE_CNTL_0 0x00028A48 +#define R_028A4C_PA_SC_MODE_CNTL_1 0x00028A4C +#define R_028AB4_VGT_REUSE_OFF 0x00028AB4 +#define R_028AB8_VGT_VTX_CNT_EN 0x00028AB8 +#define R_028ABC_DB_HTILE_SURFACE 0x00028ABC +#define R_028AC0_DB_SRESULTS_COMPARE_STATE0 0x00028AC0 +#define R_028AC4_DB_SRESULTS_COMPARE_STATE1 0x00028AC4 +#define R_028AC8_DB_PRELOAD_CONTROL 0x00028AC8 +#define R_028B54_VGT_SHADER_STAGES_EN 0x00028B54 +#define R_028B70_DB_ALPHA_TO_MASK 0x00028B70 +#define R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x00028B78 +#define S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) & 0xFF) << 0) +#define G_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) >> 0) & 0xFF) +#define C_028B78_POLY_OFFSET_NEG_NUM_DB_BITS 0xFFFFFF00 +#define S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) & 0x1) << 8) +#define G_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) >> 8) & 0x1) +#define C_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT 0xFFFFFEFF +#define R_028B7C_PA_SU_POLY_OFFSET_CLAMP 0x00028B7C +#define R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE 0x00028B80 +#define S_028B80_SCALE(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028B80_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028B80_SCALE 0x00000000 +#define R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET 0x00028B84 +#define S_028B84_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028B84_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028B84_OFFSET 0x00000000 +#define R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE 0x00028B88 +#define S_028B88_SCALE(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028B88_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028B88_SCALE 0x00000000 +#define R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x00028B8C +#define S_028B8C_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028B8C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028B8C_OFFSET 0x00000000 +#define R_028B94_VGT_STRMOUT_CONFIG 0x00028B94 +#define R_028B98_VGT_STRMOUT_BUFFER_CONFIG 0x00028B98 +#define R_028C00_PA_SC_LINE_CNTL 0x00028C00 +#define R_028C04_PA_SC_AA_CONFIG 0x00028C04 +#define R_028C08_PA_SU_VTX_CNTL 0x00028C08 +#define R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x00028C0C +#define R_028C10_PA_CL_GB_VERT_DISC_ADJ 0x00028C10 +#define R_028C14_PA_CL_GB_HORZ_CLIP_ADJ 0x00028C14 +#define R_028C18_PA_CL_GB_HORZ_DISC_ADJ 0x00028C18 +#define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX 0x00028C1C +#define R_028C3C_PA_SC_AA_MASK 0x00028C3C +#define R_028C60_CB_COLOR0_BASE 0x00028C60 +#define R_028C6C_CB_COLOR0_VIEW 0x00028C6C +#define R_028C9C_CB_COLOR1_BASE 0x00028C9C +#define R_028CA0_CB_COLOR1_PITCH 0x00028CA0 +#define R_028CA4_CB_COLOR1_SLICE 0x00028CA4 +#define R_028CA8_CB_COLOR1_VIEW 0x00028CA8 +#define R_028CAC_CB_COLOR1_INFO 0x00028CAC +#define R_028CB0_CB_COLOR1_ATTRIB 0x00028CB0 +#define R_028CB4_CB_COLOR1_DIM 0x00028CB4 +#define R_028CD8_CB_COLOR2_BASE 0x00028CD8 +#define R_028CDC_CB_COLOR2_PITCH 0x00028CDC +#define R_028CE0_CB_COLOR2_SLICE 0x00028CE0 +#define R_028CE4_CB_COLOR2_VIEW 0x00028CE4 +#define R_028CE8_CB_COLOR2_INFO 0x00028CE8 +#define R_028CEC_CB_COLOR2_ATTRIB 0x00028CEC +#define R_028CF0_CB_COLOR2_DIM 0x00028CF0 +#define R_028D14_CB_COLOR3_BASE 0x00028D14 +#define R_028D18_CB_COLOR3_PITCH 0x00028D18 +#define R_028D1C_CB_COLOR3_SLICE 0x00028D1C +#define R_028D20_CB_COLOR3_VIEW 0x00028D20 +#define R_028D24_CB_COLOR3_INFO 0x00028D24 +#define R_028D28_CB_COLOR3_ATTRIB 0x00028D28 +#define R_028D2C_CB_COLOR3_DIM 0x00028D2C +#define R_028D50_CB_COLOR4_BASE 0x00028D50 +#define R_028D54_CB_COLOR4_PITCH 0x00028D54 +#define R_028D58_CB_COLOR4_SLICE 0x00028D58 +#define R_028D5C_CB_COLOR4_VIEW 0x00028D5C +#define R_028D60_CB_COLOR4_INFO 0x00028D60 +#define R_028D64_CB_COLOR4_ATTRIB 0x00028D64 +#define R_028D68_CB_COLOR4_DIM 0x00028D68 +#define R_028D8C_CB_COLOR5_BASE 0x00028D8C +#define R_028D90_CB_COLOR5_PITCH 0x00028D90 +#define R_028D94_CB_COLOR5_SLICE 0x00028D94 +#define R_028D98_CB_COLOR5_VIEW 0x00028D98 +#define R_028D9C_CB_COLOR5_INFO 0x00028D9C +#define R_028DA0_CB_COLOR5_ATTRIB 0x00028DA0 +#define R_028DA4_CB_COLOR5_DIM 0x00028DA4 +#define R_028DC8_CB_COLOR6_BASE 0x00028DC8 +#define R_028DCC_CB_COLOR6_PITCH 0x00028DCC +#define R_028DD0_CB_COLOR6_SLICE 0x00028DD0 +#define R_028DD4_CB_COLOR6_VIEW 0x00028DD4 +#define R_028DD8_CB_COLOR6_INFO 0x00028DD8 +#define R_028DDC_CB_COLOR6_ATTRIB 0x00028DDC +#define R_028DE0_CB_COLOR6_DIM 0x00028DE0 +#define R_028E04_CB_COLOR7_BASE 0x00028E04 +#define R_028E08_CB_COLOR7_PITCH 0x00028E08 +#define R_028E0C_CB_COLOR7_SLICE 0x00028E0C +#define R_028E10_CB_COLOR7_VIEW 0x00028E10 +#define R_028E14_CB_COLOR7_INFO 0x00028E14 +#define R_028E18_CB_COLOR7_ATTRIB 0x00028E18 +#define R_028E1C_CB_COLOR7_DIM 0x00028E1C +#define R_028E40_CB_COLOR8_BASE 0x00028E40 +#define R_028E44_CB_COLOR8_PITCH 0x00028E44 +#define R_028E48_CB_COLOR8_SLICE 0x00028E48 +#define R_028E4C_CB_COLOR8_VIEW 0x00028E4C +#define R_028E50_CB_COLOR8_INFO 0x00028E50 +#define R_028E54_CB_COLOR8_ATTRIB 0x00028E54 +#define R_028E58_CB_COLOR8_DIM 0x00028E58 +#define R_028E5C_CB_COLOR9_BASE 0x00028E5C +#define R_028E60_CB_COLOR9_PITCH 0x00028E60 +#define R_028E64_CB_COLOR9_SLICE 0x00028E64 +#define R_028E68_CB_COLOR9_VIEW 0x00028E68 +#define R_028E6C_CB_COLOR9_INFO 0x00028E6C +#define R_028E70_CB_COLOR9_ATTRIB 0x00028E70 +#define R_028E74_CB_COLOR9_DIM 0x00028E74 +#define R_028E78_CB_COLOR10_BASE 0x00028E78 +#define R_028E7C_CB_COLOR10_PITCH 0x00028E7C +#define R_028E80_CB_COLOR10_SLICE 0x00028E80 +#define R_028E84_CB_COLOR10_VIEW 0x00028E84 +#define R_028E88_CB_COLOR10_INFO 0x00028E88 +#define R_028E8C_CB_COLOR10_ATTRIB 0x00028E8C +#define R_028E90_CB_COLOR10_DIM 0x00028E90 +#define R_028E94_CB_COLOR11_BASE 0x00028E94 +#define R_028E98_CB_COLOR11_PITCH 0x00028E98 +#define R_028E9C_CB_COLOR11_SLICE 0x00028E9C +#define R_028EA0_CB_COLOR11_VIEW 0x00028EA0 +#define R_028EA4_CB_COLOR11_INFO 0x00028EA4 +#define R_028EA8_CB_COLOR11_ATTRIB 0x00028EA8 +#define R_028EAC_CB_COLOR11_DIM 0x00028EAC +#define R_030000_RESOURCE0_WORD0 0x00030000 +#define R_030004_RESOURCE0_WORD1 0x00030004 +#define R_030008_RESOURCE0_WORD2 0x00030008 +#define R_03000C_RESOURCE0_WORD3 0x0003000C +#define R_030010_RESOURCE0_WORD4 0x00030010 +#define R_030014_RESOURCE0_WORD5 0x00030014 +#define R_030018_RESOURCE0_WORD6 0x00030018 +#define R_03001C_RESOURCE0_WORD7 0x0003001C +#define R_0085F0_CP_COHER_CNTL 0x0085F0 +#define S_0085F0_DEST_BASE_0_ENA(x) (((x) & 0x1) << 0) +#define G_0085F0_DEST_BASE_0_ENA(x) (((x) >> 0) & 0x1) +#define C_0085F0_DEST_BASE_0_ENA 0xFFFFFFFE +#define S_0085F0_DEST_BASE_1_ENA(x) (((x) & 0x1) << 1) +#define G_0085F0_DEST_BASE_1_ENA(x) (((x) >> 1) & 0x1) +#define C_0085F0_DEST_BASE_1_ENA 0xFFFFFFFD +#define S_0085F0_SO0_DEST_BASE_ENA(x) (((x) & 0x1) << 2) +#define G_0085F0_SO0_DEST_BASE_ENA(x) (((x) >> 2) & 0x1) +#define C_0085F0_SO0_DEST_BASE_ENA 0xFFFFFFFB +#define S_0085F0_SO1_DEST_BASE_ENA(x) (((x) & 0x1) << 3) +#define G_0085F0_SO1_DEST_BASE_ENA(x) (((x) >> 3) & 0x1) +#define C_0085F0_SO1_DEST_BASE_ENA 0xFFFFFFF7 +#define S_0085F0_SO2_DEST_BASE_ENA(x) (((x) & 0x1) << 4) +#define G_0085F0_SO2_DEST_BASE_ENA(x) (((x) >> 4) & 0x1) +#define C_0085F0_SO2_DEST_BASE_ENA 0xFFFFFFEF +#define S_0085F0_SO3_DEST_BASE_ENA(x) (((x) & 0x1) << 5) +#define G_0085F0_SO3_DEST_BASE_ENA(x) (((x) >> 5) & 0x1) +#define C_0085F0_SO3_DEST_BASE_ENA 0xFFFFFFDF +#define S_0085F0_CB0_DEST_BASE_ENA(x) (((x) & 0x1) << 6) +#define G_0085F0_CB0_DEST_BASE_ENA(x) (((x) >> 6) & 0x1) +#define C_0085F0_CB0_DEST_BASE_ENA 0xFFFFFFBF +#define S_0085F0_CB1_DEST_BASE_ENA(x) (((x) & 0x1) << 7) +#define G_0085F0_CB1_DEST_BASE_ENA(x) (((x) >> 7) & 0x1) +#define C_0085F0_CB1_DEST_BASE_ENA 0xFFFFFF7F +#define S_0085F0_CB2_DEST_BASE_ENA(x) (((x) & 0x1) << 8) +#define G_0085F0_CB2_DEST_BASE_ENA(x) (((x) >> 8) & 0x1) +#define C_0085F0_CB2_DEST_BASE_ENA 0xFFFFFEFF +#define S_0085F0_CB3_DEST_BASE_ENA(x) (((x) & 0x1) << 9) +#define G_0085F0_CB3_DEST_BASE_ENA(x) (((x) >> 9) & 0x1) +#define C_0085F0_CB3_DEST_BASE_ENA 0xFFFFFDFF +#define S_0085F0_CB4_DEST_BASE_ENA(x) (((x) & 0x1) << 10) +#define G_0085F0_CB4_DEST_BASE_ENA(x) (((x) >> 10) & 0x1) +#define C_0085F0_CB4_DEST_BASE_ENA 0xFFFFFBFF +#define S_0085F0_CB5_DEST_BASE_ENA(x) (((x) & 0x1) << 11) +#define G_0085F0_CB5_DEST_BASE_ENA(x) (((x) >> 11) & 0x1) +#define C_0085F0_CB5_DEST_BASE_ENA 0xFFFFF7FF +#define S_0085F0_CB6_DEST_BASE_ENA(x) (((x) & 0x1) << 12) +#define G_0085F0_CB6_DEST_BASE_ENA(x) (((x) >> 12) & 0x1) +#define C_0085F0_CB6_DEST_BASE_ENA 0xFFFFEFFF +#define S_0085F0_CB7_DEST_BASE_ENA(x) (((x) & 0x1) << 13) +#define G_0085F0_CB7_DEST_BASE_ENA(x) (((x) >> 13) & 0x1) +#define C_0085F0_CB7_DEST_BASE_ENA 0xFFFFDFFF +#define S_0085F0_DB_DEST_BASE_ENA(x) (((x) & 0x1) << 14) +#define G_0085F0_DB_DEST_BASE_ENA(x) (((x) >> 14) & 0x1) +#define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF +#define S_0085F0_CB8_DEST_BASE_ENA(x) (((x) & 0x1) << 15) +#define G_0085F0_CB8_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) + +#define S_0085F0_CB9_DEST_BASE_ENA(x) (((x) & 0x1) << 16) +#define G_0085F0_CB9_DEST_BASE_ENA(x) (((x) >> 16) & 0x1) + +#define S_0085F0_CB10_DEST_BASE_ENA(x) (((x) & 0x1) << 17) +#define G_0085F0_CB10_DEST_BASE_ENA(x) (((x) >> 17) & 0x1) + +#define S_0085F0_CB11_DEST_BASE_ENA(x) (((x) & 0x1) << 18) +#define G_0085F0_CB11_DEST_BASE_ENA(x) (((x) >> 18) & 0x1) + +#define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23) +#define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1) +#define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF +#define S_0085F0_VC_ACTION_ENA(x) (((x) & 0x1) << 24) +#define G_0085F0_VC_ACTION_ENA(x) (((x) >> 24) & 0x1) +#define C_0085F0_VC_ACTION_ENA 0xFEFFFFFF +#define S_0085F0_CB_ACTION_ENA(x) (((x) & 0x1) << 25) +#define G_0085F0_CB_ACTION_ENA(x) (((x) >> 25) & 0x1) +#define C_0085F0_CB_ACTION_ENA 0xFDFFFFFF +#define S_0085F0_DB_ACTION_ENA(x) (((x) & 0x1) << 26) +#define G_0085F0_DB_ACTION_ENA(x) (((x) >> 26) & 0x1) +#define C_0085F0_DB_ACTION_ENA 0xFBFFFFFF +#define S_0085F0_SH_ACTION_ENA(x) (((x) & 0x1) << 27) +#define G_0085F0_SH_ACTION_ENA(x) (((x) >> 27) & 0x1) +#define C_0085F0_SH_ACTION_ENA 0xF7FFFFFF +#define S_0085F0_SMX_ACTION_ENA(x) (((x) & 0x1) << 28) +#define G_0085F0_SMX_ACTION_ENA(x) (((x) >> 28) & 0x1) +#define C_0085F0_SMX_ACTION_ENA 0xEFFFFFFF +#define S_0085F0_CR0_ACTION_ENA(x) (((x) & 0x1) << 29) +#define G_0085F0_CR0_ACTION_ENA(x) (((x) >> 29) & 0x1) +#define C_0085F0_CR0_ACTION_ENA 0xDFFFFFFF +#define S_0085F0_CR1_ACTION_ENA(x) (((x) & 0x1) << 30) +#define G_0085F0_CR1_ACTION_ENA(x) (((x) >> 30) & 0x1) +#define C_0085F0_CR1_ACTION_ENA 0xBFFFFFFF +#define S_0085F0_CR2_ACTION_ENA(x) (((x) & 0x1) << 31) +#define G_0085F0_CR2_ACTION_ENA(x) (((x) >> 31) & 0x1) +#define C_0085F0_CR2_ACTION_ENA 0x7FFFFFFF +#define R_008970_VGT_NUM_INDICES 0x008970 +#define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 + +#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 +#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 + +#define R_03A200_SQ_LOOP_CONST_0 0x3A200 #endif diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index bce2707e77..15ee001106 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -26,8 +26,11 @@ #ifndef R600_H #define R600_H +#include <assert.h> #include <stdint.h> #include <stdio.h> +#include <util/u_double_list.h> +#include <pipe/p_compiler.h> #define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) @@ -90,63 +93,48 @@ enum radeon_family { CHIP_LAST, }; -enum radeon_family r600_get_family(struct radeon *rw); +enum chip_class { + R600, + R700, + EVERGREEN, +}; -/* - * radeon object functions - */ -#if 0 -struct radeon_bo { - unsigned refcount; - unsigned handle; - unsigned size; - unsigned alignment; - unsigned map_count; - void *data; +struct r600_tiling_info { + unsigned num_channels; + unsigned num_banks; + unsigned group_bytes; }; -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, void *ptr); -int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo); -void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo); -struct radeon_bo *radeon_bo_incref(struct radeon *radeon, struct radeon_bo *bo); -struct radeon_bo *radeon_bo_decref(struct radeon *radeon, struct radeon_bo *bo); -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); -#endif -/* lowlevel WS bo */ -struct radeon_ws_bo; -struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon, + +enum radeon_family r600_get_family(struct radeon *rw); +enum chip_class r600_get_family_class(struct radeon *radeon); +struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); + +/* r600_bo.c */ +struct r600_bo; +struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned usage); -struct radeon_ws_bo *radeon_ws_bo_handle(struct radeon *radeon, +struct r600_bo *r600_bo_handle(struct radeon *radeon, unsigned handle); -void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned usage, void *ctx); -void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo); -void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, - struct radeon_ws_bo *src); -int radeon_ws_bo_wait(struct radeon *radeon, struct radeon_ws_bo *bo); +void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); +void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); +void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, + struct r600_bo *src); +static INLINE unsigned r600_bo_offset(struct r600_bo *bo) +{ + return 0; +} + /* R600/R700 STATES */ #define R600_GROUP_MAX 16 #define R600_BLOCK_MAX_BO 32 #define R600_BLOCK_MAX_REG 128 -enum r600_group_id { - R600_GROUP_CONFIG = 0, - R600_GROUP_CONTEXT, - R600_GROUP_ALU_CONST, - R600_GROUP_RESOURCE, - R600_GROUP_SAMPLER, - R600_GROUP_CTL_CONST, - R600_GROUP_LOOP_CONST, - R600_GROUP_BOOL_CONST, - R600_NGROUPS -}; - struct r600_pipe_reg { - unsigned group_id; u32 offset; u32 mask; u32 value; - struct radeon_ws_bo *bo; + struct r600_bo *bo; }; struct r600_pipe_state { @@ -156,11 +144,9 @@ struct r600_pipe_state { }; static inline void r600_pipe_state_add_reg(struct r600_pipe_state *state, - unsigned group_id, u32 offset, - u32 value, u32 mask, - struct radeon_ws_bo *bo) + u32 offset, u32 value, u32 mask, + struct r600_bo *bo) { - state->regs[state->nregs].group_id = group_id; state->regs[state->nregs].offset = offset; state->regs[state->nregs].value = value; state->regs[state->nregs].mask = mask; @@ -173,30 +159,35 @@ static inline void r600_pipe_state_add_reg(struct r600_pipe_state *state, #define R600_BLOCK_STATUS_DIRTY (1 << 1) struct r600_block_reloc { - struct radeon_ws_bo *bo; - unsigned nreloc; - unsigned bo_pm4_index[R600_BLOCK_MAX_BO]; + struct r600_bo *bo; + unsigned flush_flags; + unsigned flush_mask; + unsigned bo_pm4_index; }; -struct r600_group_block { +struct r600_block { + struct list_head list; unsigned status; unsigned start_offset; unsigned pm4_ndwords; + unsigned pm4_flush_ndwords; unsigned nbo; unsigned nreg; + u32 *reg; u32 pm4[R600_BLOCK_MAX_REG]; unsigned pm4_bo_index[R600_BLOCK_MAX_REG]; struct r600_block_reloc reloc[R600_BLOCK_MAX_BO]; }; -struct r600_group { +struct r600_range { unsigned start_offset; unsigned end_offset; - unsigned nblocks; - struct r600_group_block *blocks; - unsigned *offset_block_id; + struct r600_block **blocks; }; +/* + * relocation + */ #pragma pack(1) struct r600_reloc { uint32_t handle; @@ -206,10 +197,38 @@ struct r600_reloc { }; #pragma pack() +/* + * query + */ +struct r600_query { + u64 result; + /* The kind of query. Currently only OQ is supported. */ + unsigned type; + /* How many results have been written, in dwords. It's incremented + * after end_query and flush. */ + unsigned num_results; + /* if we've flushed the query */ + unsigned state; + /* The buffer where query results are stored. */ + struct r600_bo *buffer; + unsigned buffer_size; + /* linked list of queries */ + struct list_head list; +}; + +#define R600_QUERY_STATE_STARTED (1 << 0) +#define R600_QUERY_STATE_ENDED (1 << 1) +#define R600_QUERY_STATE_SUSPENDED (1 << 2) + + struct r600_context { struct radeon *radeon; - unsigned ngroups; - struct r600_group groups[R600_GROUP_MAX]; + unsigned hash_size; + unsigned hash_shift; + struct r600_range range[256]; + unsigned nblocks; + struct r600_block **blocks; + struct list_head dirty; unsigned pm4_ndwords; unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; @@ -217,8 +236,14 @@ struct r600_context { unsigned nreloc; unsigned creloc; struct r600_reloc *reloc; - struct radeon_ws_bo **bo; + struct radeon_bo **bo; u32 *pm4; + struct list_head query_list; + unsigned num_query_running; + unsigned fence; + struct list_head fenced_bo; + unsigned *cfence; + struct r600_bo *fence_bo; }; struct r600_draw { @@ -227,7 +252,7 @@ struct r600_draw { u32 vgt_index_type; u32 vgt_draw_initiator; u32 indices_bo_offset; - struct radeon_ws_bo *indices; + struct r600_bo *indices; }; int r600_context_init(struct r600_context *ctx, struct radeon *radeon); @@ -241,4 +266,24 @@ void r600_context_flush(struct r600_context *ctx); void r600_context_dump_bof(struct r600_context *ctx, const char *file); void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw); +struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type); +void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query); +boolean r600_context_query_result(struct r600_context *ctx, + struct r600_query *query, + boolean wait, void *vresult); +void r600_query_begin(struct r600_context *ctx, struct r600_query *query); +void r600_query_end(struct r600_context *ctx, struct r600_query *query); +void r600_context_queries_suspend(struct r600_context *ctx); +void r600_context_queries_resume(struct r600_context *ctx); + +int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); +void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); +void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); + +void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); +void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); + #endif diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 0d17f75da7..d13da0ef63 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -20,14 +20,13 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "radeon.h" -#include "r600_context.h" +#include <stdio.h> +#include <errno.h> #include "util/u_memory.h" +#include "r600_pipe.h" #include "r600_sq.h" #include "r600_opcodes.h" #include "r600_asm.h" -#include <stdio.h> -#include <errno.h> static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) { @@ -420,7 +419,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) || bc->force_add_cf) { - /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */ r = r600_bc_add_cf(bc); if (r) { free(nalu); @@ -434,7 +432,9 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int } else { LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list); } - if (alu->last && (bc->cf_last->ndw >> 1) >= 124) { + /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots) + * worst case */ + if (alu->last && (bc->cf_last->ndw >> 1) >= 120) { bc->force_add_cf = 1; } /* number of gpr == the last gpr used in any alu */ @@ -465,8 +465,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int bc->cf_last->ndw += 2; bc->ndw += 2; - if (bc->use_mem_constant) - bc->cf_last->kcache0_mode = 2; + bc->cf_last->kcache0_mode = 2; /* process cur ALU instructions for bank swizzle */ if (alu->last) { @@ -531,7 +530,8 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && - bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)) { + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) || + bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(nvtx); @@ -543,6 +543,8 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; + if ((bc->ndw / 4) > 7) + bc->force_add_cf = 1; return 0; } @@ -557,7 +559,8 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || - bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || + bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(ntex); @@ -569,6 +572,8 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; + if ((bc->ndw / 4) > 7) + bc->force_add_cf = 1; return 0; } @@ -595,7 +600,11 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) | + S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) | + S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) | + S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) | + S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | + S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; @@ -696,6 +705,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | S_SQ_CF_ALU_WORD1_BARRIER(1) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == 0 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 62a46cb0e1..bebc7c15b0 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -58,7 +58,7 @@ struct r600_bc_alu { unsigned bank_swizzle; unsigned bank_swizzle_force; u32 value[4]; - int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; }; struct r600_bc_tex { @@ -101,6 +101,11 @@ struct r600_bc_vtx { unsigned dst_sel_y; unsigned dst_sel_z; unsigned dst_sel_w; + unsigned use_const_fields; + unsigned data_format; + unsigned num_format_all; + unsigned format_comp_all; + unsigned srf_mode_all; }; struct r600_bc_output { @@ -127,6 +132,7 @@ struct r600_bc_cf { unsigned pop_count; unsigned cf_addr; /* control flow addr */ unsigned kcache0_mode; + unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; struct list_head vtx; @@ -159,7 +165,6 @@ struct r600_cf_callstack { struct r600_bc { enum radeon_family family; int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ - unsigned use_mem_constant; struct list_head cf; struct r600_bc_cf *cf_last; unsigned ndw; @@ -175,6 +180,10 @@ struct r600_bc { struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH]; }; +/* eg_asm.c */ +int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); + +/* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); @@ -185,4 +194,7 @@ int r600_bc_build(struct r600_bc *bc); int r600_bc_add_cfinst(struct r600_bc *bc, int inst); int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); +/* r700_asm.c */ +int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); + #endif diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 54fbc50bbc..cae05aab28 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Marek Olšák <maraeo@gmail.com> + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,69 +19,118 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Marek Olšák */ -#include <errno.h> -#include <pipe/p_screen.h> +#include <util/u_surface.h> #include <util/u_blitter.h> -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include "util/u_surface.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600d.h" +#include <util/u_format.h> +#include "r600_pipe.h" -static void r600_blitter_save_states(struct pipe_context *ctx) +enum r600_blitter_op /* bitmask */ { - struct r600_context *rctx = r600_context(ctx); + R600_CLEAR = 1, + R600_CLEAR_SURFACE = 2, + R600_COPY = 4 +}; + +static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_context_queries_suspend(&rctx->ctx); - util_blitter_save_blend(rctx->blitter, rctx->blend); - util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa); - if (rctx->stencil_ref) { - util_blitter_save_stencil_ref(rctx->blitter, - &rctx->stencil_ref->state.stencil_ref); + util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); + util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]); + if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) { + util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); } - util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer); + util_blitter_save_rasterizer(rctx->blitter, rctx->states[R600_PIPE_STATE_RASTERIZER]); util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); - if (rctx->viewport) { - util_blitter_save_viewport(rctx->blitter, &rctx->viewport->state.viewport); + if (rctx->states[R600_PIPE_STATE_VIEWPORT]) { + util_blitter_save_viewport(rctx->blitter, &rctx->viewport); } - if (rctx->clip) { - util_blitter_save_clip(rctx->blitter, &rctx->clip->state.clip); + if (rctx->states[R600_PIPE_STATE_CLIP]) { + util_blitter_save_clip(rctx->blitter, &rctx->clip); } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, - rctx->vertex_buffer); + util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); - /* remove ptr so they don't get deleted */ - rctx->blend = NULL; - rctx->clip = NULL; - rctx->vs_shader = NULL; - rctx->ps_shader = NULL; - rctx->rasterizer = NULL; - rctx->dsa = NULL; rctx->vertex_elements = NULL; - /* suspend queries */ - r600_queries_suspend(ctx); + if (op & (R600_CLEAR_SURFACE | R600_COPY)) + util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); + + if (op & R600_COPY) { + util_blitter_save_fragment_sampler_states( + rctx->blitter, rctx->ps_samplers.n_samplers, + (void**)rctx->ps_samplers.samplers); + + util_blitter_save_fragment_sampler_views( + rctx->blitter, rctx->ps_samplers.n_views, + (struct pipe_sampler_view**)rctx->ps_samplers.views); + } + +} + +static void r600_blitter_end(struct pipe_context *ctx) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_context_queries_resume(&rctx->ctx); +} + +int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_framebuffer_state fb = *rctx->pframebuffer; + struct pipe_surface *zsurf, *cbsurf; + int level = 0; + float depth = 1.0f; + + r600_context_queries_suspend(&rctx->ctx); + for (int i = 0; i < fb.nr_cbufs; i++) { + fb.cbufs[i] = NULL; + pipe_surface_reference(&fb.cbufs[i], rctx->pframebuffer->cbufs[i]); + } + fb.zsbuf = NULL; + pipe_surface_reference(&fb.zsbuf, rctx->pframebuffer->zsbuf); + + zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0, + PIPE_BIND_DEPTH_STENCIL); + + cbsurf = ctx->screen->get_tex_surface(ctx->screen, + (struct pipe_resource*)texture->flushed_depth_texture, + 0, level, 0, PIPE_BIND_RENDER_TARGET); + + r600_blitter_begin(ctx, R600_CLEAR); + util_blitter_save_framebuffer(rctx->blitter, &fb); + if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || + rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) + depth = 0.0f; + + util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); + + pipe_surface_reference(&zsurf, NULL); + pipe_surface_reference(&cbsurf, NULL); + for (int i = 0; i < fb.nr_cbufs; i++) { + pipe_surface_reference(&fb.cbufs[i], NULL); + } + pipe_surface_reference(&fb.zsbuf, NULL); + r600_context_queries_resume(&rctx->ctx); + + return 0; } static void r600_clear(struct pipe_context *ctx, unsigned buffers, const float *rgba, double depth, unsigned stencil) { - struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_framebuffer_state *fb = &rctx->framebuffer; - r600_blitter_save_states(ctx); + r600_blitter_begin(ctx, R600_CLEAR); util_blitter_clear(rctx->blitter, fb->width, fb->height, fb->nr_cbufs, buffers, rgba, depth, stencil); - /* resume queries */ - r600_queries_resume(ctx); + r600_blitter_end(ctx); } static void r600_clear_render_target(struct pipe_context *ctx, @@ -90,16 +139,12 @@ static void r600_clear_render_target(struct pipe_context *ctx, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { - struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - - r600_blitter_save_states(ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_clear_render_target(rctx->blitter, dst, rgba, dstx, dsty, width, height); - /* resume queries */ - r600_queries_resume(ctx); + r600_blitter_end(ctx); } static void r600_clear_depth_stencil(struct pipe_context *ctx, @@ -110,19 +155,35 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { - struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - - r600_blitter_save_states(ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); - /* resume queries */ - r600_queries_resume(ctx); + r600_blitter_end(ctx); } + +/* Copy a block of pixels from one surface to another using HW. */ +static void r600_hw_copy_region(struct pipe_context *ctx, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + struct pipe_subresource subsrc, + unsigned srcx, unsigned srcy, unsigned srcz, + unsigned width, unsigned height) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_blitter_begin(ctx, R600_COPY); + util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height, + TRUE); + r600_blitter_end(ctx); +} + static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, struct pipe_subresource subdst, @@ -132,463 +193,22 @@ static void r600_resource_copy_region(struct pipe_context *ctx, unsigned srcx, unsigned srcy, unsigned srcz, unsigned width, unsigned height) { - util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); + boolean is_depth; + /* there is something wrong with depth resource copies at the moment so avoid them for now */ + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + if (is_depth) + util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height); + else + r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height); + } -void r600_init_blit_functions(struct r600_context *rctx) +void r600_init_blit_functions(struct r600_pipe_context *rctx) { rctx->context.clear = r600_clear; rctx->context.clear_render_target = r600_clear_render_target; rctx->context.clear_depth_stencil = r600_clear_depth_stencil; rctx->context.resource_copy_region = r600_resource_copy_region; } - - -struct r600_blit_states { - struct radeon_state rasterizer; - struct radeon_state dsa; - struct radeon_state blend; - struct radeon_state cb_cntl; - struct radeon_state vgt; - struct radeon_state draw; - struct radeon_state vs_constant0; - struct radeon_state vs_constant1; - struct radeon_state vs_constant2; - struct radeon_state vs_constant3; - struct radeon_state ps_shader; - struct radeon_state vs_shader; - struct radeon_state vs_resource0; - struct radeon_state vs_resource1; -}; - -static int r600_blit_state_vs_resources(struct r600_screen *rscreen, struct r600_blit_states *bstates) -{ - struct radeon_state *rstate; - struct radeon_ws_bo *bo; - void *data; - u32 vbo[] = { - 0xBF800000, 0xBF800000, 0x3F800000, 0x3F800000, - 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000, - 0x3F800000, 0xBF800000, 0x3F800000, 0x3F800000, - 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000, - 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, - 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000, - 0xBF800000, 0x3F800000, 0x3F800000, 0x3F800000, - 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000 - }; - - /* simple shader */ - bo = radeon_ws_bo(rscreen->rw, 128, 4096, 0); - if (bo == NULL) { - return -ENOMEM; - } - data = radeon_ws_bo_map(rscreen->rw, bo, 0, NULL); - if (!data) { - radeon_ws_bo_reference(rscreen->rw, &bo, NULL); - return -ENOMEM; - } - memcpy(data, vbo, 128); - radeon_ws_bo_unmap(rscreen->rw, bo); - - rstate = &bstates->vs_resource0; - radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, 0, R600_SHADER_VS); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD0] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD1] = 0x00000080; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD2] = 0x02302000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD3] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD4] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD5] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD6] = 0xC0000000; - rstate->bo[0] = bo; - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - if (radeon_state_pm4(rstate)) { - radeon_state_fini(rstate); - return -ENOMEM; - } - - rstate = &bstates->vs_resource1; - radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, 1, R600_SHADER_VS); - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD0] = 0x00000010; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD1] = 0x00000070; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD2] = 0x02302000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD3] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD4] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD5] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD6] = 0xC0000000; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - if (radeon_state_pm4(rstate)) { - radeon_state_fini(rstate); - return -ENOMEM; - } - - return 0; -} - -static void r600_blit_state_vs_shader(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - struct radeon_ws_bo *bo; - void *data; - u32 shader_bc_r600[] = { - 0x00000004, 0x81000400, - 0x00000008, 0xA01C0000, - 0xC001A03C, 0x94000688, - 0xC0024000, 0x94200688, - 0x7C000000, 0x002D1001, - 0x00080000, 0x00000000, - 0x7C000100, 0x002D1002, - 0x00080000, 0x00000000, - 0x00000001, 0x00601910, - 0x00000401, 0x20601910, - 0x00000801, 0x40601910, - 0x80000C01, 0x60601910, - 0x00000002, 0x00801910, - 0x00000402, 0x20801910, - 0x00000802, 0x40801910, - 0x80000C02, 0x60801910 - }; - u32 shader_bc_r700[] = { - 0x00000004, 0x81000400, - 0x00000008, 0xA01C0000, - 0xC001A03C, 0x94000688, - 0xC0024000, 0x94200688, - 0x7C000000, 0x002D1001, - 0x00080000, 0x00000000, - 0x7C000100, 0x002D1002, - 0x00080000, 0x00000000, - 0x00000001, 0x00600C90, - 0x00000401, 0x20600C90, - 0x00000801, 0x40600C90, - 0x80000C01, 0x60600C90, - 0x00000002, 0x00800C90, - 0x00000402, 0x20800C90, - 0x00000802, 0x40800C90, - 0x80000C02, 0x60800C90 - }; - - /* simple shader */ - bo = radeon_ws_bo(rscreen->rw, 128, 4096, 0); - if (bo == NULL) { - return; - } - data = radeon_ws_bo_map(rscreen->rw, bo, 0, NULL); - if (!data) { - radeon_ws_bo_reference(rscreen->rw, &bo, NULL); - return; - } - switch (rscreen->chip_class) { - case R600: - memcpy(data, shader_bc_r600, 128); - break; - case R700: - memcpy(data, shader_bc_r700, 128); - break; - default: - R600_ERR("unsupported chip family\n"); - radeon_ws_bo_unmap(rscreen->rw, bo); - radeon_ws_bo_reference(rscreen->rw, &bo, NULL); - return; - } - radeon_ws_bo_unmap(rscreen->rw, bo); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_VS_SHADER__SPI_VS_OUT_ID_0] = 0x03020100; - rstate->states[R600_VS_SHADER__SPI_VS_OUT_ID_1] = 0x07060504; - rstate->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = 0x00000005; - - rstate->bo[0] = bo; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], bo); - rstate->nbo = 2; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_ps_shader(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - struct radeon_ws_bo *bo; - void *data; - u32 shader_bc_r600[] = { - 0x00000002, 0xA00C0000, - 0xC0008000, 0x94200688, - 0x00000000, 0x00201910, - 0x00000400, 0x20201910, - 0x00000800, 0x40201910, - 0x80000C00, 0x60201910 - }; - u32 shader_bc_r700[] = { - 0x00000002, 0xA00C0000, - 0xC0008000, 0x94200688, - 0x00000000, 0x00200C90, - 0x00000400, 0x20200C90, - 0x00000800, 0x40200C90, - 0x80000C00, 0x60200C90 - }; - - /* simple shader */ - bo = radeon_ws_bo(rscreen->rw, 128, 4096, 0); - if (bo == NULL) { - return; - } - data = radeon_ws_bo_map(rscreen->rw, bo, 0, NULL); - if (!data) { - radeon_ws_bo_reference(rscreen->rw, &bo, NULL); - return; - } - switch (rscreen->chip_class) { - case R600: - memcpy(data, shader_bc_r600, 48); - break; - case R700: - memcpy(data, shader_bc_r700, 48); - break; - default: - R600_ERR("unsupported chip family\n"); - radeon_ws_bo_unmap(rscreen->rw, bo); - radeon_ws_bo_reference(rscreen->rw, &bo, NULL); - return; - } - radeon_ws_bo_unmap(rscreen->rw, bo); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0] = 0x00000C00; - rstate->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = 0x10000001; - rstate->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; - rstate->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = 0x00000002; - - rstate->bo[0] = bo; - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_vgt(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_VGT, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - rstate->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; - rstate->states[R600_VGT__VGT_PRIMITIVE_TYPE] = 0x00000005; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_draw(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_DRAW, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_DRAW__VGT_DRAW_INITIATOR] = 0x00000002; - rstate->states[R600_DRAW__VGT_NUM_INDICES] = 0x00000004; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_vs_constant(struct r600_screen *rscreen, struct radeon_state *rstate, - unsigned id, float c0, float c1, float c2, float c3) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_CONSTANT, id, R600_SHADER_VS); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT0_256] = fui(c0); - rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT1_256] = fui(c1); - rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT2_256] = fui(c2); - rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT3_256] = fui(c3); - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_rasterizer(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; - rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; - rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; - rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; - rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080004; - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_dsa(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060; - rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A; - rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_blend(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0); - radeon_state_pm4(rstate); -} - -static void r600_blit_state_cb_cntl(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0); - rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; - rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; - rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0080; - rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; - rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F; - rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rstate); -} - -static int r600_blit_states_init(struct pipe_context *ctx, struct r600_blit_states *bstates) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - - r600_blit_state_ps_shader(rscreen, &bstates->ps_shader); - r600_blit_state_vs_shader(rscreen, &bstates->vs_shader); - r600_blit_state_vgt(rscreen, &bstates->vgt); - r600_blit_state_draw(rscreen, &bstates->draw); - r600_blit_state_vs_constant(rscreen, &bstates->vs_constant0, 0, 1.0, 0.0, 0.0, 0.0); - r600_blit_state_vs_constant(rscreen, &bstates->vs_constant1, 1, 0.0, 1.0, 0.0, 0.0); - r600_blit_state_vs_constant(rscreen, &bstates->vs_constant2, 2, 0.0, 0.0, -0.00199900055, 0.0); - r600_blit_state_vs_constant(rscreen, &bstates->vs_constant3, 3, 0.0, 0.0, -0.99900049, 1.0); - r600_blit_state_rasterizer(rscreen, &bstates->rasterizer); - r600_blit_state_dsa(rscreen, &bstates->dsa); - r600_blit_state_blend(rscreen, &bstates->blend); - r600_blit_state_cb_cntl(rscreen, &bstates->cb_cntl); - r600_blit_state_vs_resources(rscreen, bstates); - return 0; -} - -static void r600_blit_states_destroy(struct pipe_context *ctx, struct r600_blit_states *bstates) -{ - radeon_state_fini(&bstates->ps_shader); - radeon_state_fini(&bstates->vs_shader); - radeon_state_fini(&bstates->vs_resource0); - radeon_state_fini(&bstates->vs_resource1); -} - -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct radeon_draw draw; - struct r600_blit_states bstates; - int r; - - r = r600_texture_scissor(ctx, rtexture, level); - if (r) { - return r; - } - r = r600_texture_cb(ctx, rtexture, 0, level); - if (r) { - return r; - } - r = r600_texture_db(ctx, rtexture, level); - if (r) { - return r; - } - r = r600_texture_viewport(ctx, rtexture, level); - if (r) { - return r; - } - - r = r600_blit_states_init(ctx, &bstates); - if (r) { - return r; - } - bstates.dsa.states[R600_DSA__DB_RENDER_CONTROL] = 0x0000008C; - bstates.cb_cntl.states[R600_CB_CNTL__CB_TARGET_MASK] = 0x00000001; - /* force rebuild */ - bstates.dsa.cpm4 = bstates.cb_cntl.cpm4 = 0; - if (radeon_state_pm4(&bstates.dsa)) { - goto out; - } - if (radeon_state_pm4(&bstates.cb_cntl)) { - goto out; - } - - r = radeon_draw_init(&draw, rscreen->rw); - if (r) { - R600_ERR("failed creating draw for uncompressing textures\n"); - goto out; - } - - radeon_draw_bind(&draw, &bstates.vs_shader); - radeon_draw_bind(&draw, &bstates.ps_shader); - radeon_draw_bind(&draw, &bstates.rasterizer); - radeon_draw_bind(&draw, &bstates.dsa); - radeon_draw_bind(&draw, &bstates.blend); - radeon_draw_bind(&draw, &bstates.cb_cntl); - radeon_draw_bind(&draw, &rctx->config); - radeon_draw_bind(&draw, &bstates.vgt); - radeon_draw_bind(&draw, &bstates.draw); - radeon_draw_bind(&draw, &bstates.vs_resource0); - radeon_draw_bind(&draw, &bstates.vs_resource1); - radeon_draw_bind(&draw, &bstates.vs_constant0); - radeon_draw_bind(&draw, &bstates.vs_constant1); - radeon_draw_bind(&draw, &bstates.vs_constant2); - radeon_draw_bind(&draw, &bstates.vs_constant3); - radeon_draw_bind(&draw, &rtexture->viewport[level]); - radeon_draw_bind(&draw, &rtexture->scissor[level]); - radeon_draw_bind(&draw, &rtexture->cb[0][level]); - radeon_draw_bind(&draw, &rtexture->db[level]); - - /* suspend queries */ - r600_queries_suspend(ctx); - - /* schedule draw*/ - r = radeon_ctx_set_draw(rctx->ctx, &draw); - if (r == -EBUSY) { - r600_flush(ctx, 0, NULL); - r = radeon_ctx_set_draw(rctx->ctx, &draw); - } - if (r) { - goto out; - } - - /* resume queries */ - r600_queries_resume(ctx); - -out: - r600_blit_states_destroy(ctx, &bstates); - return r; -} diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index dc3fc812e1..2bfa4e22fe 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -31,9 +31,10 @@ #include <util/u_memory.h> #include <util/u_upload_mgr.h> #include "state_tracker/drm_driver.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" +#include <xf86drm.h> +#include "radeon_drm.h" +#include "r600.h" +#include "r600_pipe.h" extern struct u_resource_vtbl r600_buffer_vtbl; @@ -42,23 +43,23 @@ u32 r600_domain_from_usage(unsigned usage) u32 domain = RADEON_GEM_DOMAIN_GTT; if (usage & PIPE_BIND_RENDER_TARGET) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } if (usage & PIPE_BIND_DEPTH_STENCIL) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } if (usage & PIPE_BIND_SAMPLER_VIEW) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } /* also need BIND_BLIT_SOURCE/DESTINATION ? */ if (usage & PIPE_BIND_VERTEX_BUFFER) { - domain |= RADEON_GEM_DOMAIN_GTT; + domain |= RADEON_GEM_DOMAIN_GTT; } if (usage & PIPE_BIND_INDEX_BUFFER) { - domain |= RADEON_GEM_DOMAIN_GTT; + domain |= RADEON_GEM_DOMAIN_GTT; } if (usage & PIPE_BIND_CONSTANT_BUFFER) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } return domain; @@ -67,9 +68,8 @@ u32 r600_domain_from_usage(unsigned usage) struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { - struct r600_screen *rscreen = r600_screen(screen); struct r600_resource_buffer *rbuffer; - struct radeon_ws_bo *bo; + struct r600_bo *bo; /* XXX We probably want a different alignment for buffers and textures. */ unsigned alignment = 4096; @@ -86,7 +86,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, rbuffer->r.base.vtbl = &r600_buffer_vtbl; rbuffer->r.size = rbuffer->r.base.b.width0; rbuffer->r.domain = r600_domain_from_usage(rbuffer->r.base.b.bind); - bo = radeon_ws_bo(rscreen->rw, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind); + bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind); if (bo == NULL) { FREE(rbuffer); return NULL; @@ -127,10 +127,9 @@ static void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { struct r600_resource_buffer *rbuffer = r600_buffer(buf); - struct r600_screen *rscreen = r600_screen(screen); if (rbuffer->r.bo) { - radeon_ws_bo_reference(rscreen->rw, &rbuffer->r.bo, NULL); + r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); } FREE(rbuffer); } @@ -138,9 +137,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct r600_context *rctx = r600_context(pipe); struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - struct r600_screen *rscreen = r600_screen(pipe->screen); int write = 0; uint8_t *data; int i; @@ -156,9 +153,9 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, flush = TRUE; if (flush) { - radeon_ws_bo_reference(rscreen->rw, &rbuffer->r.bo, NULL); + r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL); rbuffer->num_ranges = 0; - rbuffer->r.bo = radeon_ws_bo(rscreen->rw, + rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys, rbuffer->r.base.b.width0, 0, rbuffer->r.base.b.bind); break; @@ -171,7 +168,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, if (transfer->usage & PIPE_TRANSFER_WRITE) { write = 1; } - data = radeon_ws_bo_map(rscreen->rw, rbuffer->r.bo, transfer->usage, rctx); + data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe); if (!data) return NULL; @@ -182,10 +179,9 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - struct r600_screen *rscreen = r600_screen(pipe->screen); if (rbuffer->r.bo) - radeon_ws_bo_unmap(rscreen->rw, rbuffer->r.bo); + r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); } static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, @@ -229,16 +225,16 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, { struct radeon *rw = (struct radeon*)screen->winsys; struct r600_resource *rbuffer; - struct radeon_ws_bo *bo = NULL; + struct r600_bo *bo = NULL; - bo = radeon_ws_bo_handle(rw, whandle->handle); + bo = r600_bo_handle(rw, whandle->handle); if (bo == NULL) { return NULL; } rbuffer = CALLOC_STRUCT(r600_resource); if (rbuffer == NULL) { - radeon_ws_bo_reference(rw, &bo, NULL); + r600_bo_reference(rw, &bo, NULL); return NULL; } @@ -263,8 +259,7 @@ struct u_resource_vtbl r600_buffer_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -int r600_upload_index_buffer(struct r600_context *rctx, - struct r600_draw *draw) +int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) { struct pipe_resource *upload_buffer = NULL; unsigned index_offset = draw->index_buffer_offset; @@ -281,14 +276,17 @@ int r600_upload_index_buffer(struct r600_context *rctx, goto done; } draw->index_buffer_offset = index_offset; - draw->index_buffer = upload_buffer; + + /* Transfer ownership. */ + pipe_resource_reference(&draw->index_buffer, upload_buffer); + pipe_resource_reference(&upload_buffer, NULL); } done: return ret; } -int r600_upload_user_buffers(struct r600_context *rctx) +int r600_upload_user_buffers(struct r600_pipe_context *rctx) { enum pipe_error ret = PIPE_OK; int i, nr; diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c deleted file mode 100644 index 776dc24569..0000000000 --- a/src/gallium/drivers/r600/r600_context.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson - */ -#include <stdio.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_upload_mgr.h> -#include <util/u_blitter.h> -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" - -static void r600_destroy_context(struct pipe_context *context) -{ - struct r600_context *rctx = r600_context(context); - - rctx->rasterizer = r600_context_state_decref(rctx->rasterizer); - rctx->poly_stipple = r600_context_state_decref(rctx->poly_stipple); - rctx->scissor = r600_context_state_decref(rctx->scissor); - rctx->clip = r600_context_state_decref(rctx->clip); - rctx->ps_shader = r600_context_state_decref(rctx->ps_shader); - rctx->vs_shader = r600_context_state_decref(rctx->vs_shader); - rctx->depth = r600_context_state_decref(rctx->depth); - rctx->stencil = r600_context_state_decref(rctx->stencil); - rctx->alpha = r600_context_state_decref(rctx->alpha); - rctx->dsa = r600_context_state_decref(rctx->dsa); - rctx->blend = r600_context_state_decref(rctx->blend); - rctx->stencil_ref = r600_context_state_decref(rctx->stencil_ref); - rctx->viewport = r600_context_state_decref(rctx->viewport); - rctx->framebuffer = r600_context_state_decref(rctx->framebuffer); - - free(rctx->ps_constant); - free(rctx->vs_constant); - free(rctx->vs_resource); - - u_upload_destroy(rctx->upload_vb); - u_upload_destroy(rctx->upload_ib); - - radeon_ctx_fini(rctx->ctx); - FREE(rctx); -} - -void r600_flush(struct pipe_context *ctx, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery = NULL; - - /* flush upload buffers */ - u_upload_flush(rctx->upload_vb); - u_upload_flush(rctx->upload_ib); - - /* suspend queries */ - r600_queries_suspend(ctx); - - radeon_ctx_submit(rctx->ctx); - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - rquery->flushed = true; - } - - radeon_ctx_clear(rctx->ctx); - /* resume queries */ - r600_queries_resume(ctx); -} - -void r600_flush_ctx(void *data) -{ - struct r600_context *rctx = data; - - rctx->context.flush(&rctx->context, 0, NULL); -} - -struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) -{ - struct r600_context *rctx = CALLOC_STRUCT(r600_context); - struct r600_screen* rscreen = r600_screen(screen); - - if (rctx == NULL) - return NULL; - rctx->context.winsys = rscreen->screen.winsys; - rctx->context.screen = screen; - rctx->context.priv = priv; - rctx->context.destroy = r600_destroy_context; - rctx->context.draw_vbo = r600_draw_vbo; - rctx->context.flush = r600_flush; - - /* Easy accessing of screen/winsys. */ - rctx->screen = rscreen; - rctx->rw = rscreen->rw; - - if (rscreen->chip_class == EVERGREEN) - rctx->vtbl = &eg_hw_state_vtbl; - else - rctx->vtbl = &r600_hw_state_vtbl; - - r600_init_blit_functions(rctx); - r600_init_query_functions(rctx); - r600_init_state_functions(rctx); - r600_init_context_resource_functions(rctx); - - rctx->blitter = util_blitter_create(&rctx->context); - if (rctx->blitter == NULL) { - FREE(rctx); - return NULL; - } - - rctx->vtbl->init_config(rctx); - - rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - if (rctx->upload_ib == NULL) { - goto out_free; - } - - rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (rctx->upload_vb == NULL) { - goto out_free; - } - - rctx->vs_constant = (struct radeon_state *)calloc(R600_MAX_CONSTANT, sizeof(struct radeon_state)); - if (!rctx->vs_constant) { - goto out_free; - } - - rctx->ps_constant = (struct radeon_state *)calloc(R600_MAX_CONSTANT, sizeof(struct radeon_state)); - if (!rctx->ps_constant) { - goto out_free; - } - - rctx->vs_resource = (struct radeon_state *)calloc(R600_MAX_RESOURCE, sizeof(struct radeon_state)); - if (!rctx->vs_resource) { - goto out_free; - } - - rctx->ctx = radeon_ctx_init(rscreen->rw); - radeon_draw_init(&rctx->draw, rscreen->rw); - return &rctx->context; - out_free: - FREE(rctx); - return NULL; -} diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h deleted file mode 100644 index 3107f189c7..0000000000 --- a/src/gallium/drivers/r600/r600_context.h +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_CONTEXT_H -#define R600_CONTEXT_H - -#include <stdio.h> -#include <pipe/p_state.h> -#include <pipe/p_context.h> -#include <tgsi/tgsi_scan.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_util.h> -#include <util/u_blitter.h> -#include <util/u_double_list.h> -#include "radeon.h" -#include "r600_shader.h" - -struct u_upload_mgr; - -#define R600_QUERY_STATE_STARTED (1 << 0) -#define R600_QUERY_STATE_ENDED (1 << 1) -#define R600_QUERY_STATE_SUSPENDED (1 << 2) - -struct r600_query { - u64 result; - /* The kind of query. Currently only OQ is supported. */ - unsigned type; - /* How many results have been written, in dwords. It's incremented - * after end_query and flush. */ - unsigned num_results; - /* if we've flushed the query */ - boolean flushed; - unsigned state; - /* The buffer where query results are stored. */ - struct radeon_ws_bo *buffer; - unsigned buffer_size; - /* linked list of queries */ - struct list_head list; - struct radeon_state rstate; -}; - -/* XXX move this to a more appropriate place */ -union pipe_states { - struct pipe_rasterizer_state rasterizer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct pipe_clip_state clip; - struct pipe_shader_state shader; - struct pipe_depth_state depth; - struct pipe_stencil_state stencil; - struct pipe_alpha_state alpha; - struct pipe_depth_stencil_alpha_state dsa; - struct pipe_blend_state blend; - struct pipe_blend_color blend_color; - struct pipe_stencil_ref stencil_ref; - struct pipe_framebuffer_state framebuffer; - struct pipe_sampler_state sampler; - struct pipe_sampler_view sampler_view; - struct pipe_viewport_state viewport; -}; - -enum pipe_state_type { - pipe_rasterizer_type = 1, - pipe_poly_stipple_type, - pipe_scissor_type, - pipe_clip_type, - pipe_shader_type, - pipe_depth_type, - pipe_stencil_type, - pipe_alpha_type, - pipe_dsa_type, - pipe_blend_type, - pipe_stencil_ref_type, - pipe_framebuffer_type, - pipe_sampler_type, - pipe_sampler_view_type, - pipe_viewport_type, - pipe_type_count -}; - -#define R600_MAX_RSTATE 16 - -struct r600_context_state { - union pipe_states state; - unsigned refcount; - unsigned type; - struct radeon_state rstate[R600_MAX_RSTATE]; - struct r600_shader shader; - struct radeon_ws_bo *bo; - unsigned nrstate; -}; - -struct r600_vertex_element -{ - unsigned refcount; - unsigned count; - struct pipe_vertex_element elements[32]; -}; - -struct r600_draw { - struct pipe_context *ctx; - struct radeon_state draw; - struct radeon_state vgt; - unsigned mode; - unsigned start; - unsigned count; - unsigned index_size; - struct pipe_resource *index_buffer; - unsigned index_buffer_offset; - unsigned min_index, max_index; -}; - -struct r600_context_hw_states { - struct radeon_state rasterizer; - struct radeon_state scissor; - struct radeon_state dsa; - struct radeon_state cb_cntl; - - struct radeon_state db_flush; - struct radeon_state cb_flush; -}; - -#define R600_MAX_CONSTANT 256 /* magic */ -#define R600_MAX_RESOURCE 160 /* magic */ - -struct r600_shader_sampler_states { - unsigned nsampler; - unsigned nview; - unsigned nborder; - struct radeon_state *sampler[PIPE_MAX_ATTRIBS]; - struct radeon_state *view[PIPE_MAX_ATTRIBS]; - struct radeon_state *border[PIPE_MAX_ATTRIBS]; -}; - -struct r600_context; -struct r600_screen; -struct r600_resource; -struct r600_resource_texture; - -struct r600_context_hw_state_vtbl { - void (*blend)(struct r600_context *rctx, - struct radeon_state *rstate, - const struct pipe_blend_state *state); - void (*ucp)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_clip_state *state); - void (*cb)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state, int cb); - void (*db)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state); - void (*rasterizer)(struct r600_context *rctx, struct radeon_state *rstate); - void (*scissor)(struct r600_context *rctx, struct radeon_state *rstate); - void (*viewport)(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state); - void (*dsa)(struct r600_context *rctx, struct radeon_state *rstate); - void (*sampler_border)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id); - void (*sampler)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id); - void (*resource)(struct pipe_context *ctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id); - void (*cb_cntl)(struct r600_context *rctx, struct radeon_state *rstate); - int (*vs_resource)(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset, - uint32_t stride, uint32_t format); - int (*vgt_init)(struct r600_draw *draw, - int vgt_draw_initiator); - int (*vgt_prim)(struct r600_draw *draw, - uint32_t prim, uint32_t vgt_dma_index_type); - - int (*ps_shader)(struct r600_context *rctx, struct r600_context_state *rshader, - struct radeon_state *state); - int (*vs_shader)(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state); - void (*init_config)(struct r600_context *rctx); - - - void (*texture_state_viewport)(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned level); - void (*texture_state_cb)(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned cb, - unsigned level); - void (*texture_state_db)(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned level); - void (*texture_state_scissor)(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned level); -}; -extern struct r600_context_hw_state_vtbl r600_hw_state_vtbl; -extern struct r600_context_hw_state_vtbl eg_hw_state_vtbl; - -struct r600_context { - struct pipe_context context; - struct r600_screen *screen; - struct radeon *rw; - struct radeon_ctx *ctx; - struct blitter_context *blitter; - struct radeon_draw draw; - struct r600_context_hw_state_vtbl *vtbl; - struct radeon_state config; - boolean use_mem_constant; - /* FIXME get rid of those vs_resource,vs/ps_constant */ - struct radeon_state *vs_resource; - unsigned vs_nresource; - struct radeon_state *vs_constant; - struct radeon_state *ps_constant; - /* hw states */ - struct r600_context_hw_states hw_states; - /* pipe states */ - unsigned flat_shade; - - unsigned nvertex_buffer; - struct r600_context_state *rasterizer; - struct r600_context_state *poly_stipple; - struct r600_context_state *scissor; - struct r600_context_state *clip; - struct r600_context_state *ps_shader; - struct r600_context_state *vs_shader; - struct r600_context_state *depth; - struct r600_context_state *stencil; - struct r600_context_state *alpha; - struct r600_context_state *dsa; - struct r600_context_state *blend; - struct r600_context_state *stencil_ref; - struct r600_context_state *viewport; - struct r600_context_state *framebuffer; - struct r600_shader_sampler_states vs_sampler; - struct r600_shader_sampler_states ps_sampler; - /* can add gs later */ - struct r600_vertex_element *vertex_elements; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_index_buffer index_buffer; - struct pipe_blend_color blend_color; - struct list_head query_list; - - /* upload managers */ - struct u_upload_mgr *upload_vb; - struct u_upload_mgr *upload_ib; - bool any_user_vbs; - -}; - -/* Convenience cast wrapper. */ -static INLINE struct r600_context *r600_context(struct pipe_context *pipe) -{ - return (struct r600_context*)pipe; -} - -static INLINE struct r600_query* r600_query(struct pipe_query* q) -{ - return (struct r600_query*)q; -} - -struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate); -struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate); -void r600_flush(struct pipe_context *ctx, unsigned flags, - struct pipe_fence_handle **fence); - -int r600_context_hw_states(struct pipe_context *ctx); - -void r600_draw_vbo(struct pipe_context *ctx, - const struct pipe_draw_info *info); - -void r600_init_blit_functions(struct r600_context *rctx); -void r600_init_state_functions(struct r600_context *rctx); -void r600_init_query_functions(struct r600_context* rctx); -struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv); - -extern int r600_pipe_shader_create(struct pipe_context *ctx, - struct r600_context_state *rstate, - const struct tgsi_token *tokens); -extern int r600_pipe_shader_update(struct pipe_context *ctx, - struct r600_context_state *rstate); - -#define R600_ERR(fmt, args...) \ - fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) - -uint32_t r600_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view, - uint32_t *word4_p, uint32_t *yuv_format_p); - -/* query */ -extern void r600_queries_resume(struct pipe_context *ctx); -extern void r600_queries_suspend(struct pipe_context *ctx); - -int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); - -void r600_set_constant_buffer_file(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer); -void r600_set_constant_buffer_mem(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer); -void eg_set_constant_buffer(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer); - -int r600_upload_index_buffer(struct r600_context *rctx, - struct r600_draw *draw); -int r600_upload_user_buffers(struct r600_context *rctx); - -#endif diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c deleted file mode 100644 index 5480ca002d..0000000000 --- a/src/gallium/drivers/r600/r600_draw.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson - */ -#include <stdio.h> -#include <errno.h> -#include <pipe/p_screen.h> -#include <util/u_format.h> -#include <util/u_math.h> -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include "radeon.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" -#include "r600_state_inlines.h" - -static int r600_draw_common(struct r600_draw *draw) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - /* FIXME vs_resource */ - struct radeon_state *vs_resource; - struct r600_resource *rbuffer; - unsigned i, j, offset, format, prim; - u32 vgt_dma_index_type, vgt_draw_initiator; - struct pipe_vertex_buffer *vertex_buffer; - int r; - - r = r600_context_hw_states(draw->ctx); - if (r) - return r; - switch (draw->index_size) { - case 2: - vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_DMA); - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_DMA); - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_AUTO_INDEX); - vgt_dma_index_type = 0; - break; - default: - fprintf(stderr, "%s %d unsupported index size %d\n", __func__, __LINE__, draw->index_size); - return -EINVAL; - } - r = r600_conv_pipe_prim(draw->mode, &prim); - if (r) - return r; - - /* rebuild vertex shader if input format changed */ - r = r600_pipe_shader_update(draw->ctx, rctx->vs_shader); - if (r) - return r; - r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); - if (r) - return r; - radeon_draw_bind(&rctx->draw, &rctx->vs_shader->rstate[0]); - radeon_draw_bind(&rctx->draw, &rctx->ps_shader->rstate[0]); - - for (i = 0 ; i < rctx->vs_nresource; i++) { - radeon_state_fini(&rctx->vs_resource[i]); - } - for (i = 0 ; i < rctx->vertex_elements->count; i++) { - vs_resource = &rctx->vs_resource[i]; - j = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[j]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; - format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); - - rctx->vtbl->vs_resource(rctx, i, rbuffer, offset, vertex_buffer->stride, format); - radeon_draw_bind(&rctx->draw, vs_resource); - } - rctx->vs_nresource = rctx->vertex_elements->count; - /* FIXME start need to change winsys */ - rctx->vtbl->vgt_init(draw, vgt_draw_initiator); - radeon_draw_bind(&rctx->draw, &draw->draw); - - rctx->vtbl->vgt_prim(draw, prim, vgt_dma_index_type); - radeon_draw_bind(&rctx->draw, &draw->vgt); - - r = radeon_ctx_set_draw(rctx->ctx, &rctx->draw); - if (r == -EBUSY) { - r600_flush(draw->ctx, 0, NULL); - r = radeon_ctx_set_draw(rctx->ctx, &rctx->draw); - } - - radeon_state_fini(&draw->draw); - - return r; -} - -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_draw draw; - int r; - - assert(info->index_bias == 0); - - memset(&draw, 0, sizeof(draw)); - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = false; - } - - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_size = rctx->index_buffer.index_size; - draw.index_buffer = rctx->index_buffer.buffer; - draw.index_buffer_offset = rctx->index_buffer.offset; - - assert(rctx->index_buffer.offset % - rctx->index_buffer.index_size == 0); - r600_upload_index_buffer(rctx, &draw); - } - else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = 0; - draw.max_index = 0xffffff; - draw.index_buffer_offset = 0; - } - r = r600_draw_common(&draw); - if (r) - fprintf(stderr,"draw common failed %d\n", r); -} diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h new file mode 100644 index 0000000000..0c91a21238 --- /dev/null +++ b/src/gallium/drivers/r600/r600_formats.h @@ -0,0 +1,56 @@ +#ifndef R600_FORMATS_H +#define R600_FORMATS_H + +/* list of formats from R700 ISA document - apply across GPUs in different registers */ +#define FMT_INVALID 0x00000000 +#define FMT_8 0x00000001 +#define FMT_4_4 0x00000002 +#define FMT_3_3_2 0x00000003 +#define FMT_16 0x00000005 +#define FMT_16_FLOAT 0x00000006 +#define FMT_8_8 0x00000007 +#define FMT_5_6_5 0x00000008 +#define FMT_6_5_5 0x00000009 +#define FMT_1_5_5_5 0x0000000A +#define FMT_4_4_4_4 0x0000000B +#define FMT_5_5_5_1 0x0000000C +#define FMT_32 0x0000000D +#define FMT_32_FLOAT 0x0000000E +#define FMT_16_16 0x0000000F +#define FMT_16_16_FLOAT 0x00000010 +#define FMT_8_24 0x00000011 +#define FMT_8_24_FLOAT 0x00000012 +#define FMT_24_8 0x00000013 +#define FMT_24_8_FLOAT 0x00000014 +#define FMT_10_11_11 0x00000015 +#define FMT_10_11_11_FLOAT 0x00000016 +#define FMT_11_11_10 0x00000017 +#define FMT_11_11_10_FLOAT 0x00000018 +#define FMT_2_10_10_10 0x00000019 +#define FMT_8_8_8_8 0x0000001A +#define FMT_10_10_10_2 0x0000001B +#define FMT_X24_8_32_FLOAT 0x0000001C +#define FMT_32_32 0x0000001D +#define FMT_32_32_FLOAT 0x0000001E +#define FMT_16_16_16_16 0x0000001F +#define FMT_16_16_16_16_FLOAT 0x00000020 +#define FMT_32_32_32_32 0x00000022 +#define FMT_32_32_32_32_FLOAT 0x00000023 +#define FMT_1 0x00000025 +#define FMT_GB_GR 0x00000027 +#define FMT_BG_RG 0x00000028 +#define FMT_32_AS_8 0x00000029 +#define FMT_32_AS_8_8 0x0000002a +#define FMT_5_9_9_9_SHAREDEXP 0x0000002b +#define FMT_8_8_8 0x0000002c +#define FMT_16_16_16 0x0000002d +#define FMT_16_16_16_FLOAT 0x0000002e +#define FMT_32_32_32 0x0000002f +#define FMT_32_32_32_FLOAT 0x00000030 +#define FMT_BC1 0x00000031 +#define FMT_BC2 0x00000032 +#define FMT_BC3 0x00000033 +#define FMT_BC4 0x00000034 +#define FMT_BC5 0x00000035 + +#endif diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index 5e0e0aab57..7e13109306 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -26,8 +26,7 @@ #include <stdio.h> #include <errno.h> #include <util/u_inlines.h> -#include "r600_screen.h" -#include "r600_context.h" +#include "r600_pipe.h" #include "r600d.h" int r600_conv_pipe_prim(unsigned pprim, unsigned *prim) diff --git a/src/gallium/drivers/r600/r600_hw_states.c b/src/gallium/drivers/r600/r600_hw_states.c deleted file mode 100644 index bca78ee8de..0000000000 --- a/src/gallium/drivers/r600/r600_hw_states.c +++ /dev/null @@ -1,1287 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Dave Airlie - */ - -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_blitter.h> -#include "util/u_pack_color.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" -#include "r600_state_inlines.h" -#include "r600d.h" - -static void r600_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - int i; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0); - rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); - rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); - rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); - rstate->states[R600_BLEND__CB_BLEND_ALPHA] = fui(rctx->blend_color.color[3]); - rstate->states[R600_BLEND__CB_BLEND0_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND1_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND2_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND3_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND4_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND5_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND6_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND7_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND_CONTROL] = 0x00000000; - - for (i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; - uint32_t bc = 0; - - if (!state->rt[i].blend_enable) - continue; - - bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); - bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); - bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); - - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= S_028804_SEPARATE_ALPHA_BLEND(1); - bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); - bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); - bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); - } - - rstate->states[R600_BLEND__CB_BLEND0_CONTROL + i] = bc; - if (i == 0) - rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc; - } - - radeon_state_pm4(rstate); -} - -static void r600_ucp(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_clip_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0); - - for (int i = 0; i < state->nr; i++) { - rstate->states[i * 4 + 0] = fui(state->ucp[i][0]); - rstate->states[i * 4 + 1] = fui(state->ucp[i][1]); - rstate->states[i * 4 + 2] = fui(state->ucp[i][2]); - rstate->states[i * 4 + 3] = fui(state->ucp[i][3]); - } - radeon_state_pm4(rstate); -} - -static void r600_cb(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state, int cb) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level = state->cbufs[cb]->level; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0); - rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; - rbuffer = &rtex->resource; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; - - ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); - - color_info = S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_SOURCE_FORMAT(1) | - S_0280A0_NUMBER_TYPE(ntype); - - rstate->states[R600_CB0__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8; - rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; - rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | - S_028060_SLICE_TILE_MAX(slice); - rstate->states[R600_CB0__CB_COLOR0_VIEW] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; - radeon_state_pm4(rstate); -} - -static void r600_db(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level; - unsigned pitch, slice, format; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); - if (state->zsbuf == NULL) - return; - - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tilled = 1; - rtex->array_mode = 2; - rtex->tile_type = 1; - rtex->depth = 1; - rbuffer = &rtex->resource; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - rstate->states[R600_DB__DB_DEPTH_BASE] = state->zsbuf->offset >> 8; - rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtex->array_mode) | - S_028010_FORMAT(format); - rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; - rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | - S_028000_SLICE_TILE_MAX(slice); - radeon_state_pm4(rstate); -} - -static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - const struct pipe_clip_state *clip = NULL; - struct r600_screen *rscreen = rctx->screen; - float offset_units = 0, offset_scale = 0; - char depth = 0; - unsigned offset_db_fmt_cntl = 0; - unsigned tmp; - unsigned prov_vtx = 1; - - if (rctx->clip) - clip = &rctx->clip->state.clip; - if (fb->zsbuf) { - offset_units = state->offset_units; - offset_scale = state->offset_scale * 12.0f; - switch (fb->zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return; - } - } - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - - if (state->flatshade_first) - prov_vtx = 0; - - rctx->flat_shade = state->flatshade; - radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; - if (state->sprite_coord_enable) { - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_TOP_1(1); - } - } - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0; - if (clip) { - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1); - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp); - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); - } - rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = - S_028814_PROVOKING_VTX_LAST(prov_vtx) | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); - rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = - S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); - rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; - /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0); - rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp); - rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; - rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; - rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; - rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale); - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - radeon_state_pm4(rstate); -} - -static void r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - struct r600_screen *rscreen = rctx->screen; - enum radeon_family family; - unsigned minx, maxx, miny, maxy; - u32 tl, br; - - family = radeon_get_family(rctx->rw); - - if (state == NULL) { - minx = 0; - miny = 0; - maxx = fb->cbufs[0]->width; - maxy = fb->cbufs[0]->height; - } else { - minx = state->minx; - miny = state->miny; - maxx = state->maxx; - maxy = state->maxy; - } - tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy); - radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; - - if (family >= CHIP_RV770) - rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - radeon_state_pm4(rstate); -} - -static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - radeon_state_pm4(rstate); -} - -static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; - const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; - struct r600_screen *rscreen = rctx->screen; - unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; - unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - struct r600_shader *rshader; - struct r600_query *rquery = NULL; - boolean query_running; - int i; - - if (rctx->ps_shader == NULL) { - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); - - db_shader_control = 0; - db_shader_control |= S_02880C_DUAL_EXPORT_ENABLE(1); - db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); - - rshader = &rctx->ps_shader->shader; - if (rshader->uses_kill) - db_shader_control |= S_02880C_KILL_ENABLE(1); - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); - } - stencil_ref_mask = 0; - stencil_ref_mask_bf = 0; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ - - if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); - db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); - db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - - stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask); - stencil_ref_mask |= S_028430_STENCILREF(stencil_ref->ref_value[0]); - if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); - db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); - db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); - stencil_ref_mask_bf |= S_028430_STENCILREF(stencil_ref->ref_value[1]); - } - } - - alpha_test_control = 0; - alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); - alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); - } - - db_render_control = S_028D0C_STENCIL_COMPRESS_DISABLE(1) | - S_028D0C_DEPTH_COMPRESS_DISABLE(1); - db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - - query_running = false; - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - query_running = true; - } - } - - if (query_running) { - db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); - if (rscreen->chip_class == R700) - db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1); - } - - rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; - rstate->states[R600_DSA__DB_STENCILREFMASK] = stencil_ref_mask; - rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf; - rstate->states[R600_DSA__SX_ALPHA_REF] = alpha_ref; - rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CONTROL] = db_depth_control; - rstate->states[R600_DSA__DB_SHADER_CONTROL] = db_shader_control; - rstate->states[R600_DSA__DB_RENDER_CONTROL] = db_render_control; - rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = db_render_override; - - rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; - rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; - rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - radeon_state_pm4(rstate); -} - - -static INLINE u32 S_FIXED(float value, u32 frac_bits) -{ - return value * (1 << frac_bits); -} - -static void r600_sampler_border(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER_BORDER, id, R600_SHADER_PS); - if (uc.ui) { - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED] = fui(state->border_color[0]); - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN] = fui(state->border_color[1]); - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE] = fui(state->border_color[2]); - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA] = fui(state->border_color[3]); - } - radeon_state_pm4(rstate); -} - -static void r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS); - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = - S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | - S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | - S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | - S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | - S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | - S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | - S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0); - /* FIXME LOD it depends on texture base level ... */ - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | - S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); - radeon_state_pm4(rstate); - -} - - -static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_screen *rscreen = rctx->screen; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; - struct r600_resource *rbuffer; - unsigned format; - uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4], array_mode = 0, tile_type = 0; - int r; - - rstate->cpm4 = 0; - swizzle[0] = view->swizzle_r; - swizzle[1] = view->swizzle_g; - swizzle[2] = view->swizzle_b; - swizzle[3] = view->swizzle_a; - format = r600_translate_texformat(view->texture->format, - swizzle, - &word4, &yuv_format); - if (format == ~0) { - return; - } - desc = util_format_description(view->texture->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", view->texture->format); - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS); - tmp = (struct r600_resource_texture*)view->texture; - rbuffer = &tmp->resource; - if (tmp->depth) { - r = r600_texture_from_depth(ctx, tmp, view->first_level); - if (r) { - return; - } - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], tmp->uncompressed); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], tmp->uncompressed); - } else { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo); - } - rstate->nbo = 2; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[1] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; - - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; - - /* FIXME properly handle first level != 0 */ - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = - S_038000_DIM(r600_tex_dim(view->texture->target)) | - S_038000_TILE_MODE(array_mode) | - S_038000_TILE_TYPE(tile_type) | - S_038000_PITCH((pitch / 8) - 1) | - S_038000_TEX_WIDTH(view->texture->width0 - 1); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = - S_038004_TEX_HEIGHT(view->texture->height0 - 1) | - S_038004_TEX_DEPTH(view->texture->depth0 - 1) | - S_038004_DATA_FORMAT(format); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8; - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = - word4 | - S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | - S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | - S_038010_REQUEST_SIZE(1) | - S_038010_BASE_LEVEL(view->first_level); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = - S_038014_LAST_LEVEL(view->last_level) | - S_038014_BASE_ARRAY(0) | - S_038014_LAST_ARRAY(0); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = - S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); - radeon_state_pm4(rstate); -} - -static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_blend_state *pbs = &rctx->blend->state.blend; - int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; - uint32_t color_control, target_mask, shader_mask; - int i; - - target_mask = 0; - shader_mask = 0; - color_control = S_028808_PER_MRT_BLEND(1); - - for (i = 0; i < nr_cbufs; i++) { - shader_mask |= 0xf << (i * 4); - } - - if (pbs->logicop_enable) { - color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20); - } else { - color_control |= (0xcc << 16); - } - - if (pbs->independent_blend_enable) { - for (i = 0; i < nr_cbufs; i++) { - if (pbs->rt[i].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (pbs->rt[i].colormask << (4 * i)); - } - } else { - for (i = 0; i < nr_cbufs; i++) { - if (pbs->rt[0].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (pbs->rt[0].colormask << (4 * i)); - } - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0); - rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; - rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; - rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; - rstate->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; - rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; - rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; - rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; - rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rstate); -} - -static void r600_init_config(struct r600_context *rctx) -{ - int ps_prio; - int vs_prio; - int gs_prio; - int es_prio; - int num_ps_gprs; - int num_vs_gprs; - int num_gs_gprs; - int num_es_gprs; - int num_temp_gprs; - int num_ps_threads; - int num_vs_threads; - int num_gs_threads; - int num_es_threads; - int num_ps_stack_entries; - int num_vs_stack_entries; - int num_gs_stack_entries; - int num_es_stack_entries; - enum radeon_family family; - - family = radeon_get_family(rctx->rw); - ps_prio = 0; - vs_prio = 1; - gs_prio = 2; - es_prio = 3; - switch (family) { - case CHIP_R600: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV630: - case CHIP_RV635: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 40; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - default: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV670: - num_ps_gprs = 144; - num_vs_gprs = 40; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV770: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 256; - num_vs_stack_entries = 256; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV730: - case CHIP_RV740: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV710: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 48; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - } - radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0); - - rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000; - switch (family) { - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV710: - break; - default: - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); - break; - } - - if (!rctx->screen->use_mem_constant) - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); - - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); - - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); - - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - - rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000; - - if (family >= CHIP_RV770) { - rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; - rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; - rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000000; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; - } else { - rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000; - rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000003; - rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x82000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x01020204; - rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00004010; - } - rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; - rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(&rctx->config); -} - -static int r600_vs_resource(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset, - uint32_t stride, uint32_t format) -{ - struct radeon_state *vs_resource = &rctx->vs_resource[id]; - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_VS); - radeon_ws_bo_reference(rscreen->rw, &vs_resource->bo[0], rbuffer->bo); - vs_resource->nbo = 1; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->size - offset - 1; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(stride) | - S_038008_DATA_FORMAT(format); - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; - vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; - vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(vs_resource); -} - -static int r600_draw_vgt_init(struct r600_draw *draw, - int vgt_draw_initiator) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - struct r600_resource *rbuffer = (struct r600_resource *)draw->index_buffer; - radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0); - draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; - draw->draw.states[R600_DRAW__VGT_DMA_BASE] = draw->index_buffer_offset; - if (rbuffer) { - radeon_ws_bo_reference(rscreen->rw, &draw->draw.bo[0], rbuffer->bo); - draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw.nbo = 1; - } - return radeon_state_pm4(&draw->draw); -} - -static int r600_draw_vgt_prim(struct r600_draw *draw, - uint32_t prim, uint32_t vgt_dma_index_type) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0); - draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = draw->max_index; - draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = draw->min_index; - draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->start; - draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - return radeon_state_pm4(&draw->vgt); -} - -static int r600_ps_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_rasterizer_state *rasterizer; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp, exports_ps, num_cout; - boolean have_pos = FALSE; - - rasterizer = &rctx->rasterizer->state.rasterizer; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(i); - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - if (rasterizer->sprite_coord_enable & (1 << i)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - exports_ps |= (1 << (num_cout+1)); - num_cout++; - } - } - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - if (have_pos) { - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1) | - S_0286CC_BARYC_SAMPLE_CNTL(1); - state->states[R600_PS_SHADER__SPI_INPUT_Z] |= 1; - } - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack); - state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - state->nbo = 1; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -static int r600_vs_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); - for (i = 0; i < 10; i++) { - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; - } - /* so far never got proper semantic id from tgsi */ - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; - } - state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); - state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack); - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - radeon_ws_bo_reference(rscreen->rw, &state->bo[1], rpshader->bo); - state->nbo = 2; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - state->placement[2] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -static void r600_texture_state_scissor(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned level) -{ - struct radeon_state *rstate = &rtexture->scissor[level]; - enum radeon_family family; - - family = radeon_get_family(rscreen->rw); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - - if (family >= CHIP_RV770) - rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = 0x80000000; - - radeon_state_pm4(rstate); -} - -static void r600_texture_state_cb(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned cb, unsigned level) -{ - struct radeon_state *rstate; - struct r600_resource *rbuffer; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - - rstate = &rtexture->cb[cb][level]; - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0); - rbuffer = &rtexture->resource; - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1; - slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1; - ntype = 0; - desc = util_format_description(rbuffer->base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - format = r600_translate_colorformat(rtexture->resource.base.b.format); - swap = r600_translate_colorswap(rtexture->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rtexture->uncompressed); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rtexture->uncompressed); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[2], rtexture->uncompressed); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 3; - color_info = 0; - } else { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[2], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 3; - color_info = S_0280A0_SOURCE_FORMAT(1); - } - color_info |= S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_NUMBER_TYPE(ntype); - rstate->states[R600_CB0__CB_COLOR0_BASE] = rtexture->offset[level] >> 8; - rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; - rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | - S_028060_SLICE_TILE_MAX(slice); - - radeon_state_pm4(rstate); -} - -static void r600_texture_state_db(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) -{ - struct radeon_state *rstate = &rtexture->db[level]; - struct r600_resource *rbuffer; - unsigned pitch, slice, format; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); - rbuffer = &rtexture->resource; - rtexture->tilled = 1; - rtexture->array_mode = 2; - rtexture->tile_type = 1; - rtexture->depth = 1; - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1; - slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1; - format = r600_translate_dbformat(rbuffer->base.b.format); - rstate->states[R600_DB__DB_DEPTH_BASE] = rtexture->offset[level] >> 8; - rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtexture->array_mode) | - S_028010_FORMAT(format); - rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (rtexture->height[level] / 8) -1; - rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | - S_028000_SLICE_TILE_MAX(slice); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - - radeon_state_pm4(rstate); -} - -static void r600_texture_state_viewport(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) -{ - struct radeon_state *rstate = &rtexture->viewport[level]; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui((float)rtexture->height[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui((float)-rtexture->height[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = 0x3F000000; - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = 0x3F000000; - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - - radeon_state_pm4(rstate); -} - -struct r600_context_hw_state_vtbl r600_hw_state_vtbl = { - .blend = r600_blend, - .ucp = r600_ucp, - .cb = r600_cb, - .db = r600_db, - .rasterizer = r600_rasterizer, - .scissor = r600_scissor, - .viewport = r600_viewport, - .dsa = r600_dsa, - .sampler_border = r600_sampler_border, - .sampler = r600_sampler, - .resource = r600_resource, - .cb_cntl = r600_cb_cntl, - .vs_resource = r600_vs_resource, - .vgt_init = r600_draw_vgt_init, - .vgt_prim = r600_draw_vgt_prim, - .vs_shader = r600_vs_shader, - .ps_shader = r600_ps_shader, - .init_config = r600_init_config, - .texture_state_viewport = r600_texture_state_viewport, - .texture_state_db = r600_texture_state_db, - .texture_state_cb = r600_texture_state_cb, - .texture_state_scissor = r600_texture_state_scissor, -}; - -void r600_set_constant_buffer_file(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, i, type, shader_class; - struct radeon_state *rstate, *rstates; - struct pipe_transfer *transfer; - u32 *ptr; - - type = R600_STATE_CONSTANT; - - switch (shader) { - case PIPE_SHADER_VERTEX: - shader_class = R600_SHADER_VS; - rstates = rctx->vs_constant; - break; - case PIPE_SHADER_FRAGMENT: - shader_class = R600_SHADER_PS; - rstates = rctx->ps_constant; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - if (buffer && buffer->width0 > 0) { - nconstant = buffer->width0 / 16; - ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); - if (ptr == NULL) - return; - for (i = 0; i < nconstant; i++) { - rstate = &rstates[i]; - radeon_state_init(rstate, rscreen->rw, type, i, shader_class); - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; - if (radeon_state_pm4(rstate)) - return; - radeon_draw_bind(&rctx->draw, rstate); - } - pipe_buffer_unmap(ctx, buffer, transfer); - } -} - -void r600_set_constant_buffer_mem(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, type, shader_class, size; - struct radeon_state *rstate, *rstates; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - type = R600_STATE_CBUF; - - switch (shader) { - case PIPE_SHADER_VERTEX: - shader_class = R600_SHADER_VS; - rstates = rctx->vs_constant; - break; - case PIPE_SHADER_FRAGMENT: - shader_class = R600_SHADER_PS; - rstates = rctx->ps_constant; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - - rstate = &rstates[0]; - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - - nconstant = buffer->width0 / 16; - size = ALIGN_DIVUP(nconstant, 16); - - radeon_state_init(rstate, rscreen->rw, type, 0, shader_class); - rstate->states[R600_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0] = size; - rstate->states[R600_VS_CBUF__ALU_CONST_CACHE_VS_0] = 0; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - if (radeon_state_pm4(rstate)) - return; - radeon_draw_bind(&rctx->draw, rstate); -} - diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 0cf9c1c401..4f9b39a7fd 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -233,12 +233,6 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL 0x00000012 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE 0x00000013 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR 0x00000014 -/* same up to here */ -/* -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA 0x00000015 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 0x00000016 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x00000018 -*/ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT 0x00000015 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT 0x00000016 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT 0x00000017 @@ -336,9 +330,11 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099 /* TODO Fill in more ALU */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW 0x000000D7 diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_pipe.c index 1711fabfc7..dd8fa4fcd7 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -19,37 +19,221 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson */ #include <stdio.h> -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_memory.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_public.h" +#include <errno.h> +#include <pipe/p_defines.h> +#include <pipe/p_state.h> +#include <pipe/p_context.h> +#include <tgsi/tgsi_scan.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_util.h> +#include <util/u_blitter.h> +#include <util/u_double_list.h> +#include <util/u_transfer.h> +#include <util/u_surface.h> +#include <util/u_pack_color.h> +#include <util/u_memory.h> +#include <util/u_inlines.h> +#include <util/u_upload_mgr.h> +#include <pipebuffer/pb_buffer.h> +#include "r600.h" +#include "r600d.h" #include "r600_resource.h" +#include "r600_shader.h" +#include "r600_pipe.h" #include "r600_state_inlines.h" +/* + * pipe_context + */ +static void r600_flush(struct pipe_context *ctx, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; +#if 0 + static int dc = 0; + char dname[256]; +#endif + + if (!rctx->ctx.pm4_cdwords) + return; + + u_upload_flush(rctx->upload_vb); + u_upload_flush(rctx->upload_ib); + +#if 0 + sprintf(dname, "gallium-%08d.bof", dc); + if (dc < 20) { + r600_context_dump_bof(&rctx->ctx, dname); + R600_ERR("dumped %s\n", dname); + } + dc++; +#endif + r600_context_flush(&rctx->ctx); +} + +static void r600_destroy_context(struct pipe_context *context) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)context; + + r600_context_fini(&rctx->ctx); + for (int i = 0; i < R600_PIPE_NSTATES; i++) { + free(rctx->states[i]); + } + + u_upload_destroy(rctx->upload_vb); + u_upload_destroy(rctx->upload_ib); + + FREE(rctx->ps_resource); + FREE(rctx->vs_resource); + FREE(rctx); +} + +static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) +{ + struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context); + struct r600_screen* rscreen = (struct r600_screen *)screen; + enum chip_class class; + + if (rctx == NULL) + return NULL; + rctx->context.winsys = rscreen->screen.winsys; + rctx->context.screen = screen; + rctx->context.priv = priv; + rctx->context.destroy = r600_destroy_context; + rctx->context.flush = r600_flush; + + /* Easy accessing of screen/winsys. */ + rctx->screen = rscreen; + rctx->radeon = rscreen->radeon; + rctx->family = r600_get_family(rctx->radeon); + + r600_init_blit_functions(rctx); + r600_init_query_functions(rctx); + r600_init_context_resource_functions(rctx); + + switch (r600_get_family(rctx->radeon)) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + rctx->context.draw_vbo = r600_draw_vbo; + r600_init_state_functions(rctx); + if (r600_context_init(&rctx->ctx, rctx->radeon)) { + r600_destroy_context(&rctx->context); + return NULL; + } + r600_init_config(rctx); + break; + case CHIP_CEDAR: + case CHIP_REDWOOD: + case CHIP_JUNIPER: + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + rctx->context.draw_vbo = evergreen_draw; + evergreen_init_state_functions(rctx); + if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { + r600_destroy_context(&rctx->context); + return NULL; + } + evergreen_init_config(rctx); + break; + default: + R600_ERR("unsupported family %d\n", r600_get_family(rctx->radeon)); + r600_destroy_context(&rctx->context); + return NULL; + } + + rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, + PIPE_BIND_INDEX_BUFFER); + if (rctx->upload_ib == NULL) { + r600_destroy_context(&rctx->context); + return NULL; + } + + rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, + PIPE_BIND_VERTEX_BUFFER); + if (rctx->upload_vb == NULL) { + r600_destroy_context(&rctx->context); + return NULL; + } + + rctx->blitter = util_blitter_create(&rctx->context); + if (rctx->blitter == NULL) { + FREE(rctx); + return NULL; + } + + rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->vs_resource) { + FREE(rctx); + return NULL; + } + + rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->ps_resource) { + FREE(rctx); + return NULL; + } + + class = r600_get_family_class(rctx->radeon); + if (class == R600 || class == R700) + rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); + else + rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); + + r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth; + + return &rctx->context; +} + +/* + * pipe_screen + */ static const char* r600_get_vendor(struct pipe_screen* pscreen) { return "X.Org"; } +static const char *r600_get_family_name(enum radeon_family family) +{ + switch(family) { + case CHIP_R600: return "R600"; + case CHIP_RV610: return "RV610"; + case CHIP_RV630: return "RV630"; + case CHIP_RV670: return "RV670"; + case CHIP_RV620: return "RV620"; + case CHIP_RV635: return "RV635"; + case CHIP_RS780: return "RS780"; + case CHIP_RS880: return "RS880"; + case CHIP_RV770: return "RV770"; + case CHIP_RV730: return "RV730"; + case CHIP_RV710: return "RV710"; + case CHIP_RV740: return "RV740"; + case CHIP_CEDAR: return "CEDAR"; + case CHIP_REDWOOD: return "REDWOOD"; + case CHIP_JUNIPER: return "JUNIPER"; + case CHIP_CYPRESS: return "CYPRESS"; + case CHIP_HEMLOCK: return "HEMLOCK"; + default: return "unknown"; + } +} + static const char* r600_get_name(struct pipe_screen* pscreen) { - struct r600_screen *screen = r600_screen(pscreen); - enum radeon_family family = radeon_get_family(screen->rw); + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + enum radeon_family family = r600_get_family(rscreen->radeon); - if (family >= CHIP_R600 && family < CHIP_RV770) - return "R600 (HD2XXX,HD3XXX)"; - else if (family < CHIP_CEDAR) - return "R700 (HD4XXX)"; - else - return "EVERGREEN"; + return r600_get_family_name(family); } static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) @@ -72,6 +256,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_SHADER_STENCIL_EXPORT: return 1; /* Unsupported features (boolean caps). */ @@ -87,7 +272,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 14; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: /* FIXME allow this once infrastructure is there */ - return 0; + return 16; case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 16; @@ -104,15 +289,35 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; + default: R600_ERR("r600: unknown param %d\n", param); return 0; } } +static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 8192.0f; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + default: + R600_ERR("r600: unsupported paramf %d\n", param); + return 0.0f; + } +} + static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) { - switch(shader) { + switch(shader) + { case PIPE_SHADER_FRAGMENT: case PIPE_SHADER_VERTEX: break; @@ -155,24 +360,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e } } -static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - default: - R600_ERR("r600: unsupported paramf %d\n", param); - return 0.0f; - } -} - static boolean r600_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, @@ -226,58 +413,25 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, static void r600_destroy_screen(struct pipe_screen* pscreen) { - struct r600_screen* rscreen = r600_screen(pscreen); + struct r600_screen *rscreen = (struct r600_screen *)pscreen; if (rscreen == NULL) return; FREE(rscreen); } -struct pipe_screen *r600_screen_create(struct radeon *rw) + +struct pipe_screen *r600_screen_create(struct radeon *radeon) { - struct r600_screen* rscreen; - enum radeon_family family = radeon_get_family(rw); + struct r600_screen *rscreen; rscreen = CALLOC_STRUCT(r600_screen); if (rscreen == NULL) { return NULL; } - - /* don't enable mem constant for r600 yet */ - rscreen->use_mem_constant = FALSE; - switch (family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - rscreen->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - rscreen->chip_class = R700; - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - rscreen->chip_class = EVERGREEN; - rscreen->use_mem_constant = TRUE; - break; - default: - FREE(rscreen); - return NULL; - } - radeon_set_mem_constant(rw, rscreen->use_mem_constant); - rscreen->rw = rw; - rscreen->screen.winsys = (struct pipe_winsys*)rw; + rscreen->radeon = radeon; + rscreen->screen.winsys = (struct pipe_winsys*)radeon; rscreen->screen.destroy = r600_destroy_screen; rscreen->screen.get_name = r600_get_name; rscreen->screen.get_vendor = r600_get_vendor; @@ -288,5 +442,8 @@ struct pipe_screen *r600_screen_create(struct radeon *rw) rscreen->screen.context_create = r600_create_context; r600_init_screen_texture_functions(&rscreen->screen); r600_init_screen_resource_functions(&rscreen->screen); + + rscreen->tiling_info = r600_get_tiling_info(radeon); + return &rscreen->screen; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h new file mode 100644 index 0000000000..35548329e4 --- /dev/null +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -0,0 +1,223 @@ +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#ifndef R600_PIPE_H +#define R600_PIPE_H + +#include <pipe/p_state.h> +#include <pipe/p_screen.h> +#include <pipe/p_context.h> +#include <util/u_math.h> +#include "r600.h" +#include "r600_public.h" +#include "r600_shader.h" +#include "r600_resource.h" + +enum r600_pipe_state_id { + R600_PIPE_STATE_BLEND = 0, + R600_PIPE_STATE_BLEND_COLOR, + R600_PIPE_STATE_CONFIG, + R600_PIPE_STATE_CLIP, + R600_PIPE_STATE_SCISSOR, + R600_PIPE_STATE_VIEWPORT, + R600_PIPE_STATE_RASTERIZER, + R600_PIPE_STATE_VGT, + R600_PIPE_STATE_FRAMEBUFFER, + R600_PIPE_STATE_DSA, + R600_PIPE_STATE_STENCIL_REF, + R600_PIPE_STATE_PS_SHADER, + R600_PIPE_STATE_VS_SHADER, + R600_PIPE_STATE_CONSTANT, + R600_PIPE_STATE_SAMPLER, + R600_PIPE_STATE_RESOURCE, + R600_PIPE_NSTATES +}; + +struct r600_screen { + struct pipe_screen screen; + struct radeon *radeon; + struct r600_tiling_info *tiling_info; +}; + +struct r600_pipe_sampler_view { + struct pipe_sampler_view base; + struct r600_pipe_state state; +}; + +struct r600_pipe_rasterizer { + struct r600_pipe_state rstate; + bool flatshade; + unsigned sprite_coord_enable; + float offset_units; + float offset_scale; +}; + +struct r600_pipe_blend { + struct r600_pipe_state rstate; + unsigned cb_target_mask; +}; + +struct r600_vertex_element +{ + unsigned count; + unsigned refcount; + struct pipe_vertex_element elements[32]; +}; + +struct r600_pipe_shader { + struct r600_shader shader; + struct r600_pipe_state rstate; + struct r600_bo *bo; + struct r600_vertex_element vertex_elements; +}; + +/* needed for blitter save */ +struct r600_textures_info { + struct r600_pipe_sampler_view **views; + unsigned n_views; + void **samplers; + unsigned n_samplers; +}; + +#define R600_CONSTANT_ARRAY_SIZE 256 +#define R600_RESOURCE_ARRAY_SIZE 160 + +struct r600_pipe_context { + struct pipe_context context; + struct blitter_context *blitter; + struct pipe_framebuffer_state *pframebuffer; + unsigned family; + void *custom_dsa_flush; + struct r600_screen *screen; + struct radeon *radeon; + struct r600_pipe_state *states[R600_PIPE_NSTATES]; + struct r600_context ctx; + struct r600_vertex_element *vertex_elements; + struct pipe_framebuffer_state framebuffer; + struct pipe_index_buffer index_buffer; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nvertex_buffer; + unsigned cb_target_mask; + /* for saving when using blitter */ + struct pipe_stencil_ref stencil_ref; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + struct r600_pipe_state *vs_resource; + struct r600_pipe_state *ps_resource; + struct r600_pipe_state config; + struct r600_pipe_shader *ps_shader; + struct r600_pipe_shader *vs_shader; + struct r600_pipe_state vs_const_buffer; + struct r600_pipe_state ps_const_buffer; + struct r600_pipe_rasterizer *rasterizer; + /* shader information */ + unsigned sprite_coord_enable; + bool flatshade; + struct u_upload_mgr *upload_vb; + struct u_upload_mgr *upload_ib; + unsigned any_user_vbs; + struct r600_textures_info ps_samplers; + +}; + +struct r600_drawl { + struct pipe_context *ctx; + unsigned mode; + unsigned min_index; + unsigned max_index; + unsigned index_bias; + unsigned start; + unsigned count; + unsigned index_size; + unsigned index_buffer_offset; + struct pipe_resource *index_buffer; +}; + +/* evergreen_state.c */ +void evergreen_init_state_functions(struct r600_pipe_context *rctx); +void evergreen_init_config(struct r600_pipe_context *rctx); +void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info); +void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); + +/* r600_blit.c */ +void r600_init_blit_functions(struct r600_pipe_context *rctx); +int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); + +/* r600_buffer.c */ +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ); +struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, + void *ptr, unsigned bytes, + unsigned bind); +unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, + struct pipe_resource *buf, + unsigned face, unsigned level); +struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, + struct winsys_handle *whandle); +int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); +int r600_upload_user_buffers(struct r600_pipe_context *rctx); + +/* r600_query.c */ +void r600_init_query_functions(struct r600_pipe_context *rctx); + +/* r600_resource.c */ +void r600_init_context_resource_functions(struct r600_pipe_context *r600); + +/* r600_shader.c */ +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader); +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); +int r600_find_vs_semantic_index(struct r600_shader *vs, + struct r600_shader *ps, int id); + +/* r600_state.c */ +void r600_init_state_functions(struct r600_pipe_context *rctx); +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); +void r600_init_config(struct r600_pipe_context *rctx); +void r600_translate_index_buffer(struct r600_pipe_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count); +void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); +/* r600_helper.h */ +int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); + +/* r600_texture.c */ +void r600_init_screen_texture_functions(struct pipe_screen *screen); +uint32_t r600_translate_texformat(enum pipe_format format, + const unsigned char *swizzle_view, + uint32_t *word4_p, uint32_t *yuv_format_p); + +/* + * common helpers + */ +static INLINE u32 S_FIXED(float value, u32 frac_bits) +{ + return value * (1 << frac_bits); +} +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +#endif diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h index 1d89c9f9f6..f1970201e8 100644 --- a/src/gallium/drivers/r600/r600_public.h +++ b/src/gallium/drivers/r600/r600_public.h @@ -1,9 +1,28 @@ - +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ #ifndef R600_PUBLIC_H #define R600_PUBLIC_H -struct radeon; - -struct pipe_screen* r600_screen_create(struct radeon *rw); +struct pipe_screen *r600_screen_create(struct radeon *radeon); #endif diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 12900cce11..726668260c 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -19,231 +19,55 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson */ -#include <errno.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include "r600_screen.h" -#include "r600_context.h" - -static void r600_query_begin(struct r600_context *rctx, struct r600_query *rquery) -{ - struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate = &rquery->rstate; - - radeon_state_fini(rstate); - radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_BEGIN, 0, 0); - rstate->states[R600_QUERY__OFFSET] = rquery->num_results; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rquery->buffer); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - if (radeon_state_pm4(rstate)) { - radeon_state_fini(rstate); - } -} - -static void r600_query_end(struct r600_context *rctx, struct r600_query *rquery) -{ - struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate = &rquery->rstate; - - radeon_state_fini(rstate); - radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_END, 0, 0); - rstate->states[R600_QUERY__OFFSET] = rquery->num_results + 8; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rquery->buffer); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - if (radeon_state_pm4(rstate)) { - radeon_state_fini(rstate); - } -} +#include "r600_pipe.h" static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct r600_query *q; - - if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) - return NULL; - - q = CALLOC_STRUCT(r600_query); - if (!q) - return NULL; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - q->type = query_type; - q->buffer_size = 4096; - - q->buffer = radeon_ws_bo(rscreen->rw, q->buffer_size, 1, 0); - if (!q->buffer) { - FREE(q); - return NULL; - } - - LIST_ADDTAIL(&q->list, &rctx->query_list); - - return (struct pipe_query *)q; -} - -static void r600_destroy_query(struct pipe_context *ctx, - struct pipe_query *query) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_query *q = r600_query(query); - - radeon_ws_bo_reference(rscreen->rw, &q->buffer, NULL); - LIST_DEL(&q->list); - FREE(query); + return (struct pipe_query*)r600_context_query_create(&rctx->ctx, query_type); } -static void r600_query_result(struct pipe_context *ctx, struct r600_query *rquery) +static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - u64 start, end; - u32 *results; - int i; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - radeon_ws_bo_wait(rscreen->rw, rquery->buffer); - results = radeon_ws_bo_map(rscreen->rw, rquery->buffer, 0, r600_context(ctx)); - for (i = 0; i < rquery->num_results; i += 4) { - start = (u64)results[i] | (u64)results[i + 1] << 32; - end = (u64)results[i + 2] | (u64)results[i + 3] << 32; - if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) { - rquery->result += end - start; - } - } - radeon_ws_bo_unmap(rscreen->rw, rquery->buffer); - rquery->num_results = 0; -} - -static void r600_query_resume(struct pipe_context *ctx, struct r600_query *rquery) -{ - struct r600_context *rctx = r600_context(ctx); - - if (rquery->num_results >= ((rquery->buffer_size >> 2) - 2)) { - /* running out of space */ - if (!rquery->flushed) { - ctx->flush(ctx, 0, NULL); - } - r600_query_result(ctx, rquery); - } - r600_query_begin(rctx, rquery); - rquery->flushed = false; -} - -static void r600_query_suspend(struct pipe_context *ctx, struct r600_query *rquery) -{ - struct r600_context *rctx = r600_context(ctx); - - r600_query_end(rctx, rquery); - rquery->num_results += 16; + r600_context_query_destroy(&rctx->ctx, (struct r600_query *)query); } static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery = r600_query(query); - int r; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; - rquery->state = R600_QUERY_STATE_STARTED; + rquery->result = 0; rquery->num_results = 0; - rquery->flushed = false; - r600_query_resume(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } + r600_query_begin(&rctx->ctx, (struct r600_query *)query); } static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery = r600_query(query); - int r; - - rquery->state &= ~R600_QUERY_STATE_STARTED; - rquery->state |= R600_QUERY_STATE_ENDED; - r600_query_suspend(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } -} - -void r600_queries_suspend(struct pipe_context *ctx) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery; - int r; - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - r600_query_suspend(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } - } - rquery->state |= R600_QUERY_STATE_SUSPENDED; - } -} - -void r600_queries_resume(struct pipe_context *ctx) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery; - int r; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - r600_query_resume(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } - } - rquery->state &= ~R600_QUERY_STATE_SUSPENDED; - } + r600_query_end(&rctx->ctx, (struct r600_query *)query); } static boolean r600_get_query_result(struct pipe_context *ctx, struct pipe_query *query, boolean wait, void *vresult) { - struct r600_query *rquery = r600_query(query); - uint64_t *result = (uint64_t*)vresult; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; - if (!rquery->flushed) { + if (rquery->num_results) { ctx->flush(ctx, 0, NULL); - rquery->flushed = true; } - r600_query_result(ctx, rquery); - *result = rquery->result; - rquery->result = 0; - return TRUE; + return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); } -void r600_init_query_functions(struct r600_context* rctx) +void r600_init_query_functions(struct r600_pipe_context *rctx) { - LIST_INITHEAD(&rctx->query_list); - rctx->context.create_query = r600_create_query; rctx->context.destroy_query = r600_destroy_query; rctx->context.begin_query = r600_begin_query; diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index 05707740da..207642ccfa 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -21,9 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r600_context.h" -#include "r600_resource.h" -#include "r600_screen.h" +#include "r600_pipe.h" static struct pipe_resource *r600_resource_create(struct pipe_screen *screen, const struct pipe_resource *templ) @@ -46,7 +44,16 @@ static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * scre } } -void r600_init_context_resource_functions(struct r600_context *r600) +void r600_init_screen_resource_functions(struct pipe_screen *screen) +{ + screen->resource_create = r600_resource_create; + screen->resource_from_handle = r600_resource_from_handle; + screen->resource_get_handle = u_resource_get_handle_vtbl; + screen->resource_destroy = u_resource_destroy_vtbl; + screen->user_buffer_create = r600_user_buffer_create; +} + +void r600_init_context_resource_functions(struct r600_pipe_context *r600) { r600->context.get_transfer = u_get_transfer_vtbl; r600->context.transfer_map = u_transfer_map_vtbl; @@ -56,12 +63,3 @@ void r600_init_context_resource_functions(struct r600_context *r600) r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; } - -void r600_init_screen_resource_functions(struct pipe_screen *screen) -{ - screen->resource_create = r600_resource_create; - screen->resource_from_handle = r600_resource_from_handle; - screen->resource_get_handle = u_resource_get_handle_vtbl; - screen->resource_destroy = u_resource_destroy_vtbl; - screen->user_buffer_create = r600_user_buffer_create; -} diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 6ddb1ad32a..ef484aba4a 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -25,8 +25,15 @@ #include "util/u_transfer.h" -struct r600_context; -struct r600_screen; +/* Texture transfer. */ +struct r600_transfer { + /* Base class. */ + struct pipe_transfer transfer; + /* Buffer transfer. */ + struct pipe_transfer *buffer_transfer; + unsigned offset; + struct pipe_resource *linear_texture; +}; /* This gets further specialized into either buffer or texture * structures. Use the vtbl struct to choose between the two @@ -34,7 +41,7 @@ struct r600_screen; */ struct r600_resource { struct u_resource base; - struct radeon_ws_bo *bo; + struct r600_bo *bo; u32 domain; u32 flink; u32 size; @@ -42,27 +49,20 @@ struct r600_resource { struct r600_resource_texture { struct r600_resource resource; - unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long width[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long height[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch_override; - unsigned long bpt; - unsigned long size; - unsigned tilled; + unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; + unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_override; + unsigned size; + unsigned tiled; unsigned array_mode; unsigned tile_type; unsigned depth; unsigned dirty; - struct radeon_ws_bo *uncompressed; - struct radeon_state scissor[PIPE_MAX_TEXTURE_LEVELS]; - struct radeon_state cb[8][PIPE_MAX_TEXTURE_LEVELS]; - struct radeon_state db[PIPE_MAX_TEXTURE_LEVELS]; - struct radeon_state viewport[PIPE_MAX_TEXTURE_LEVELS]; + struct r600_resource_texture *flushed_depth_texture; }; -void r600_init_context_resource_functions(struct r600_context *r600); void r600_init_screen_resource_functions(struct pipe_screen *screen); /* r600_buffer */ @@ -103,7 +103,25 @@ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buf static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) { - return r600_buffer(buffer)->user_buffer ? true : false; + return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; } +int r600_texture_depth_flush(struct pipe_context *ctx, + struct pipe_resource *texture); + +extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); + +/* r600_texture.c texture transfer functions. */ +struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, + struct pipe_resource *texture, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box); +void r600_texture_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *trans); +void* r600_texture_transfer_map(struct pipe_context *ctx, + struct pipe_transfer* transfer); +void r600_texture_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer* transfer); + #endif diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h deleted file mode 100644 index 4be77865fb..0000000000 --- a/src/gallium/drivers/r600/r600_screen.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_SCREEN_H -#define R600_SCREEN_H - -#include <pipe/p_state.h> -#include <pipe/p_screen.h> -#include <pipebuffer/pb_buffer.h> -#include <xf86drm.h> -#include <radeon_drm.h> -#include "radeon.h" -#include "util/u_transfer.h" -#include "r600_resource.h" - -/* Texture transfer. */ -struct r600_transfer { - /* Base class. */ - struct pipe_transfer transfer; - /* Buffer transfer. */ - struct pipe_transfer *buffer_transfer; - unsigned offset; - struct pipe_resource *linear_texture; -}; - -enum chip_class { - R600, - R700, - EVERGREEN, -}; - -struct r600_screen { - struct pipe_screen screen; - struct radeon *rw; - enum chip_class chip_class; - boolean use_mem_constant; -}; - -static INLINE struct r600_screen *r600_screen(struct pipe_screen *screen) -{ - return (struct r600_screen*)screen; -} - -/* Buffer functions. */ -struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ); -struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, - void *ptr, unsigned bytes, - unsigned bind); -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned face, unsigned level); -struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, - struct winsys_handle *whandle); - -/* r600_texture.c texture transfer functions. */ -struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, - struct pipe_resource *texture, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box); -void r600_texture_transfer_destroy(struct pipe_context *ctx, - struct pipe_transfer *trans); -void* r600_texture_transfer_map(struct pipe_context *ctx, - struct pipe_transfer* transfer); -void r600_texture_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer* transfer); -int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); -int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level); -int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); -int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); -int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); - -/* r600_blit.c */ -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); - -/* helpers */ -int r600_conv_pipe_format(unsigned pformat, unsigned *format); -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); - -void r600_init_screen_texture_functions(struct pipe_screen *screen); - -#endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 4da6850b0a..d1143985ea 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -25,9 +25,7 @@ #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_dump.h" #include "util/u_format.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_shader.h" +#include "r600_pipe.h" #include "r600_asm.h" #include "r600_sq.h" #include "r600_opcodes.h" @@ -35,38 +33,227 @@ #include <stdio.h> #include <errno.h> +static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned spi_vs_out_id[10]; + unsigned i, tmp; + + /* clear previous register */ + rstate->nregs = 0; + + /* so far never got proper semantic id from tgsi */ + for (i = 0; i < 10; i++) { + spi_vs_out_id[i] = 0; + } + for (i = 0; i < 32; i++) { + tmp = i << ((i & 3) * 8); + spi_vs_out_id[i / 4] |= tmp; + } + for (i = 0; i < 10; i++) { + r600_pipe_state_add_reg(rstate, + R_028614_SPI_VS_OUT_ID_0 + i * 4, + spi_vs_out_id[i], 0xFFFFFFFF, NULL); + } + + r600_pipe_state_add_reg(rstate, + R_0286C4_SPI_VS_OUT_CONFIG, + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028868_SQ_PGM_RESOURCES_VS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288A4_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288D0_SQ_PGM_CF_OFFSET_VS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288DC_SQ_PGM_CF_OFFSET_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028858_SQ_PGM_START_VS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028894_SQ_PGM_START_FS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, + 0xFFFFFFFF, NULL); -struct r600_shader_tgsi_instruction; +} -struct r600_shader_ctx { - struct tgsi_shader_info info; - struct tgsi_parse_context parse; - const struct tgsi_token *tokens; - unsigned type; - unsigned file_offset[TGSI_FILE_COUNT]; - unsigned temp_reg; - struct r600_shader_tgsi_instruction *inst_info; - struct r600_bc *bc; - struct r600_shader *shader; - u32 value[4]; - u32 *literals; - u32 nliterals; - u32 max_driver_temp_used; -}; +int r600_find_vs_semantic_index(struct r600_shader *vs, + struct r600_shader *ps, int id) +{ + struct r600_shader_io *input = &ps->input[id]; -struct r600_shader_tgsi_instruction { - unsigned tgsi_opcode; - unsigned is_op3; - unsigned r600_opcode; - int (*process)(struct r600_shader_ctx *ctx); -}; + for (int i = 0; i < vs->noutput; i++) { + if (input->name == vs->output[i].name && + input->sid == vs->output[i].sid) { + return i - 1; + } + } + return 0; +} -static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; -static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); +static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; + + /* clear previous register */ + rstate->nregs = 0; + + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + pos_index = i; + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); + } + if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_Z_EXPORT_ENABLE(1), + S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_STENCIL_REF_EXPORT_ENABLE(1), + S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); + } + + exports_ps = 0; + num_cout = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + exports_ps |= 1; + else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { + num_cout++; + } + } + exports_ps |= S_028854_EXPORT_COLORS(num_cout); + if (!exports_ps) { + /* always at least export 1 component per pixel */ + exports_ps = 2; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | + S_0286CC_PERSP_GRADIENT_ENA(1); + spi_input_z = 0; + if (pos_index != -1) { + spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | + S_0286CC_BARYC_SAMPLE_CNTL(1)); + spi_input_z |= 1; + } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028840_SQ_PGM_START_PS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028850_SQ_PGM_RESOURCES_PS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028854_SQ_PGM_EXPORTS_PS, + exports_ps, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288CC_SQ_PGM_CF_OFFSET_PS, + 0x00000000, 0xFFFFFFFF, NULL); + + if (rshader->uses_kill) { + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_KILL_ENABLE(1), + S_02880C_KILL_ENABLE(1), NULL); + } + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, + 0xFFFFFFFF, NULL); +} -static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_shader *rshader = &shader->shader; + void *ptr; + + /* copy new shader */ + if (shader->bo == NULL) { + shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0); + if (shader->bo == NULL) { + return -ENOMEM; + } + ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); + memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); + r600_bo_unmap(rctx->radeon, shader->bo); + } + /* build state */ + rshader->flat_shade = rctx->flatshade; + switch (rshader->processor_type) { + case TGSI_PROCESSOR_VERTEX: + if (rshader->family >= CHIP_CEDAR) { + evergreen_pipe_shader_vs(ctx, shader); + } else { + r600_pipe_shader_vs(ctx, shader); + } + break; + case TGSI_PROCESSOR_FRAGMENT: + if (rshader->family >= CHIP_CEDAR) { + evergreen_pipe_shader_ps(ctx, shader); + } else { + r600_pipe_shader_ps(ctx, shader); + } + break; + default: + return -EINVAL; + } + r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); + return 0; +} + +static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_shader *shader = &rshader->shader; const struct util_format_description *desc; enum pipe_format resource_format[160]; unsigned i, nresources = 0; @@ -76,9 +263,16 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shad if (shader->processor_type != TGSI_PROCESSOR_VERTEX) return 0; + /* doing a full memcmp fell over the refcount */ + if ((rshader->vertex_elements.count == rctx->vertex_elements->count) && + (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) { + return 0; + } + rshader->vertex_elements = *rctx->vertex_elements; for (i = 0; i < rctx->vertex_elements->count; i++) { resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; } + r600_bo_reference(rctx->radeon, &rshader->bo, NULL); LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { switch (cf->inst) { case V_SQ_CF_WORD1_SQ_CF_INST_VTX: @@ -102,25 +296,40 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shad return r600_bc_build(&shader->bc); } -int r600_pipe_shader_create(struct pipe_context *ctx, - struct r600_context_state *rpshader, - const struct tgsi_token *tokens) +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + int r; + + if (shader == NULL) + return -EINVAL; + /* there should be enough input */ + if (rctx->vertex_elements->count < shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, shader->shader.bc.nresource); + return -EINVAL; + } + r = r600_shader_update(ctx, shader); + if (r) + return r; + return r600_pipe_shader(ctx, shader); +} + +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; int r; //fprintf(stderr, "--------------------------------------------------------------\n"); //tgsi_dump(tokens, 0); - if (rpshader == NULL) - return -ENOMEM; - rpshader->shader.family = radeon_get_family(rscreen->rw); - rpshader->shader.use_mem_constant = rscreen->use_mem_constant; - r = r600_shader_from_tgsi(tokens, &rpshader->shader); + shader->shader.family = r600_get_family(rctx->radeon); + r = r600_shader_from_tgsi(tokens, &shader->shader); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; } - r = r600_bc_build(&rpshader->shader.bc); + r = r600_bc_build(&shader->shader.bc); if (r) { R600_ERR("building bytecode failed !\n"); return r; @@ -129,81 +338,41 @@ int r600_pipe_shader_create(struct pipe_context *ctx, return 0; } -static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_context *rctx = r600_context(ctx); - struct radeon_state *state; - - state = &rpshader->rstate[0]; - radeon_state_fini(&rpshader->rstate[0]); - - return rctx->vtbl->vs_shader(rctx, rpshader, state); -} - -static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_context *rctx = r600_context(ctx); - struct radeon_state *state; - - state = &rpshader->rstate[0]; - radeon_state_fini(state); - - return rctx->vtbl->ps_shader(rctx, rpshader, state); -} - -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct r600_shader *rshader = &rpshader->shader; - int r; - void *data; +/* + * tgsi -> r600 shader + */ +struct r600_shader_tgsi_instruction; - /* copy new shader */ - radeon_ws_bo_reference(rscreen->rw, &rpshader->bo, NULL); - rpshader->bo = NULL; - rpshader->bo = radeon_ws_bo(rscreen->rw, rshader->bc.ndw * 4, - 4096, 0); - if (rpshader->bo == NULL) { - return -ENOMEM; - } - data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, rctx); - memcpy(data, rshader->bc.bytecode, rshader->bc.ndw * 4); - radeon_ws_bo_unmap(rscreen->rw, rpshader->bo); - /* build state */ - rshader->flat_shade = rctx->flat_shade; - switch (rshader->processor_type) { - case TGSI_PROCESSOR_VERTEX: - r = r600_pipe_shader_vs(ctx, rpshader); - break; - case TGSI_PROCESSOR_FRAGMENT: - r = r600_pipe_shader_ps(ctx, rpshader); - break; - default: - r = -EINVAL; - break; - } - return r; -} +struct r600_shader_ctx { + struct tgsi_shader_info info; + struct tgsi_parse_context parse; + const struct tgsi_token *tokens; + unsigned type; + unsigned file_offset[TGSI_FILE_COUNT]; + unsigned temp_reg; + struct r600_shader_tgsi_instruction *inst_info; + struct r600_bc *bc; + struct r600_shader *shader; + u32 value[4]; + u32 *literals; + u32 nliterals; + u32 max_driver_temp_used; + /* needed for evergreen interpolation */ + boolean input_centroid; + boolean input_linear; + boolean input_perspective; + int num_interp_gpr; +}; -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_context *rctx = r600_context(ctx); - int r; +struct r600_shader_tgsi_instruction { + unsigned tgsi_opcode; + unsigned is_op3; + unsigned r600_opcode; + int (*process)(struct r600_shader_ctx *ctx); +}; - if (rpshader == NULL) - return -EINVAL; - /* there should be enough input */ - if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rpshader->shader.bc.nresource); - return -EINVAL; - } - r = r600_shader_update(ctx, &rpshader->shader); - if (r) - return r; - return r600_pipe_shader(ctx, rpshader); -} +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; +static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); static int tgsi_is_supported(struct r600_shader_ctx *ctx) { @@ -225,11 +394,9 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) } #endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { - if (i->Src[j].Register.Dimension || - i->Src[j].Register.Absolute) { - R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, - i->Src[j].Register.Dimension, - i->Src[j].Register.Absolute); + if (i->Src[j].Register.Dimension) { + R600_ERR("unsupported src %d (dimension %d)\n", j, + i->Src[j].Register.Dimension); return -EINVAL; } } @@ -242,10 +409,33 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) return 0; } -static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) +static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) { int i, r; struct r600_bc_alu alu; + int gpr = 0, base_chan = 0; + int ij_index = 0; + + if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { + ij_index = 0; + if (ctx->shader->input[input].centroid) + ij_index++; + } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { + ij_index = 0; + /* if we have perspective add one */ + if (ctx->input_perspective) { + ij_index++; + /* if we have perspective centroid */ + if (ctx->input_centroid) + ij_index++; + } + if (ctx->shader->input[input].centroid) + ij_index++; + } + + /* work out gpr and base_chan from index */ + gpr = ij_index / 2; + base_chan = (2 * (ij_index % 2)) + 1; for (i = 0; i < 8; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -256,13 +446,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; if ((i > 1) && (i < 6)) { - alu.dst.sel = ctx->shader->input[gpr].gpr; + alu.dst.sel = ctx->shader->input[input].gpr; alu.dst.write = 1; } alu.dst.chan = i % 4; - alu.src[0].chan = (1 - (i % 2)); - alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr; + + alu.src[0].sel = gpr; + alu.src[0].chan = (base_chan - (i % 2)); + + alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; alu.bank_swizzle_force = SQ_ALU_VEC_210; if ((i % 4) == 3) @@ -288,6 +481,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; ctx->shader->input[i].interpolate = d->Declaration.Interpolate; + ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; if (ctx->type == TGSI_PROCESSOR_VERTEX) { /* turn input into fetch */ @@ -304,13 +498,19 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) vtx.dst_sel_y = 1; vtx.dst_sel_z = 2; vtx.dst_sel_w = 3; + vtx.use_const_fields = 1; r = r600_bc_add_vtx(ctx->bc, &vtx); if (r) return r; } if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { /* turn input into interpolate on EG */ - evergreen_interp_alu(ctx, i); + if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { + if (ctx->shader->input[i].interpolate > 0) { + ctx->shader->input[i].lds_pos = ctx->shader->nlds++; + evergreen_interp_alu(ctx, i); + } + } } break; case TGSI_FILE_OUTPUT: @@ -337,6 +537,53 @@ static int r600_get_temp(struct r600_shader_ctx *ctx) return ctx->temp_reg + ctx->max_driver_temp_used++; } +/* + * for evergreen we need to scan the shader to find the number of GPRs we need to + * reserve for interpolation. + * + * we need to know if we are going to emit + * any centroid inputs + * if perspective and linear are required +*/ +static int evergreen_gpr_count(struct r600_shader_ctx *ctx) +{ + int i; + int num_baryc; + + ctx->input_linear = FALSE; + ctx->input_perspective = FALSE; + ctx->input_centroid = FALSE; + ctx->num_interp_gpr = 1; + + /* any centroid inputs */ + for (i = 0; i < ctx->info.num_inputs; i++) { + /* skip position/face */ + if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || + ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) + continue; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) + ctx->input_linear = TRUE; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) + ctx->input_perspective = TRUE; + if (ctx->info.input_centroid[i]) + ctx->input_centroid = TRUE; + } + + num_baryc = 0; + /* ignoring sample for now */ + if (ctx->input_perspective) + num_baryc++; + if (ctx->input_linear) + num_baryc++; + if (ctx->input_centroid) + num_baryc *= 2; + + ctx->num_interp_gpr += (num_baryc + 1) >> 1; + + /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ + return ctx->num_interp_gpr; +} + int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; @@ -351,7 +598,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = r600_bc_init(ctx.bc, shader->family); if (r) return r; - ctx.bc->use_mem_constant = shader->use_mem_constant; ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -383,14 +629,15 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; } + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { + ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); + } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_count[TGSI_FILE_INPUT]; ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + ctx.info.file_count[TGSI_FILE_OUTPUT]; - if (ctx.shader->use_mem_constant) - ctx.file_offset[TGSI_FILE_CONSTANT] = 128; - else - ctx.file_offset[TGSI_FILE_CONSTANT] = 256; + + ctx.file_offset[TGSI_FILE_CONSTANT] = 128; ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + @@ -481,7 +728,14 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { output[i].array_base = 61; output[i].swizzle_x = 2; - output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7; + output[i].swizzle_y = 7; + output[i].swizzle_z = output[i].swizzle_w = 7; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { + output[i].array_base = 61; + output[i].swizzle_x = 7; + output[i].swizzle_y = 1; + output[i].swizzle_z = output[i].swizzle_w = 7; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); @@ -514,7 +768,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].barrier = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = 0; - output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; + output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); noutput++; } } @@ -587,6 +841,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx, if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; r600_src->neg = tgsi_src->Register.Negate; + r600_src->abs = tgsi_src->Register.Absolute; r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; return 0; } @@ -642,13 +897,14 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s } } for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { + if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[j].sel; + alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; + alu.src[0].rel = r600_src[i].rel; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -658,7 +914,8 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s if (r) return r; } - r600_src[j].sel = treg; + r600_src[i].sel = treg; + r600_src[i].rel =0; j--; } } @@ -677,13 +934,13 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ nliteral++; } } - for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) { + for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[j].sel; + alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; alu.dst.sel = treg; alu.dst.chan = k; @@ -694,11 +951,11 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); + r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); if (r) return r; - r600_src[j].sel = treg; - j++; + r600_src[i].sel = treg; + j--; } } return 0; @@ -721,6 +978,9 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -791,6 +1051,9 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; r = tgsi_split_literal_constant(ctx, r600_src); if (r) @@ -926,38 +1189,95 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - r = tgsi_setup_trig(ctx, r600_src); - if (r) - return r; - + /* We'll only need the trig stuff if we are going to write to the + * X or Y components of the destination vector. + */ + if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { + r = tgsi_setup_trig(ctx, r600_src); + if (r) + return r; + } /* dst.x = COS */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); + r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (r) + return r; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } /* dst.y = SIN */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); + r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (r) + return r; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* dst.z = 0.0; */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + + r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + if (r) + return r; + + alu.src[0].sel = V_SQ_ALU_SRC_0; + alu.src[0].chan = 0; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } + + /* dst.w = 1.0; */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + + r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + if (r) + return r; + + alu.src[0].sel = V_SQ_ALU_SRC_1; + alu.src[0].chan = 0; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; return 0; } @@ -1157,34 +1477,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) return tgsi_helper_tempx_replicate(ctx); } -static int tgsi_trans(struct r600_shader_ctx *ctx) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, r; - - for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - if (inst->Dst[0].Register.WriteMask & (1 << i)) { - alu.inst = ctx->inst_info->r600_opcode; - for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); - if (r) - return r; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); - } - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - } - return 0; -} - static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -1301,6 +1593,9 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { @@ -1397,6 +1692,9 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; /* do it in 2 step as op3 doesn't support writemask */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1429,6 +1727,9 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; @@ -1502,7 +1803,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - + for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -1530,7 +1831,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - src_not_temp = false; + src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -1558,6 +1859,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_chan = 1; src2_chan = 2; break; + default: + assert(0); + src_chan = 0; + src2_chan = 0; + break; } r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); if (r) @@ -1641,7 +1947,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_literal(ctx->bc, lit_vals); if (r) return r; - src_not_temp = false; + src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -1650,7 +1956,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = src_gpr; - alu.src[0].chan = i; + alu.src[0].chan = tgsi_chan(&inst->Src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1670,14 +1976,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; - tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.sampler_id = tex.resource_id; + tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; + tex.resource_id = tex.sampler_id; + if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX) + tex.resource_id += PIPE_MAX_ATTRIBS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; - tex.dst_sel_x = 0; - tex.dst_sel_y = 1; - tex.dst_sel_z = 2; - tex.dst_sel_w = 3; + tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; + tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; + tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; + tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; tex.src_sel_x = 0; tex.src_sel_y = 1; tex.src_sel_z = 2; @@ -1720,6 +2028,9 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; /* 1 - src0 */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1799,6 +2110,9 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; if (inst->Dst[0].Register.WriteMask != 0xf) use_temp = 1; @@ -1850,7 +2164,10 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; - + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; + for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -2293,7 +2610,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx) } /* r6/7 only for now */ -static int tgsi_arl(struct r600_shader_ctx *ctx) +static int tgsi_eg_arl(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + alu.last = 1; + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + if (r) + return r; + return 0; +} +static int tgsi_r600_arl(struct r600_shader_ctx *ctx) { /* TODO from r600c, ar values don't persist between clauses */ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -2313,6 +2663,7 @@ static int tgsi_arl(struct r600_shader_ctx *ctx) r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); if (r) return r; + ctx->bc->cf_last->r6xx_uses_waterfall = 1; return 0; } @@ -2637,7 +2988,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) } static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, + {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, @@ -2718,7 +3069,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ @@ -2801,7 +3152,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { }; static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, @@ -2876,7 +3227,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 06dd65038d..f8bc595139 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -31,6 +31,8 @@ struct r600_shader_io { unsigned done; int sid; unsigned interpolate; + boolean centroid; + unsigned lds_pos; /* for evergreen */ }; struct r600_shader { @@ -39,11 +41,11 @@ struct r600_shader { boolean flat_shade; unsigned ninput; unsigned noutput; + unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; boolean uses_kill; - boolean use_mem_constant; }; int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 4dcdc492fc..00234f956a 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -19,184 +19,566 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse + */ + +/* TODO: + * - fix mask for depth control & cull for query */ #include <stdio.h> #include <errno.h> -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_memory.h" -#include "util/u_pack_color.h" -#include "r600_screen.h" -#include "r600_context.h" +#include <pipe/p_defines.h> +#include <pipe/p_state.h> +#include <pipe/p_context.h> +#include <tgsi/tgsi_scan.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_util.h> +#include <util/u_double_list.h> +#include <util/u_pack_color.h> +#include <util/u_memory.h> +#include <util/u_inlines.h> +#include <util/u_upload_mgr.h> +#include <util/u_index_modify.h> +#include <util/u_framebuffer.h> +#include <pipebuffer/pb_buffer.h> +#include "r600.h" +#include "r600d.h" #include "r600_resource.h" +#include "r600_shader.h" +#include "r600_pipe.h" +#include "r600_state_inlines.h" -static struct r600_context_state *r600_new_context_state(unsigned type) +static void r600_draw_common(struct r600_drawl *draw) { - struct r600_context_state *rstate = CALLOC_STRUCT(r600_context_state); - if (rstate == NULL) - return NULL; - rstate->type = type; - rstate->refcount = 1; - return rstate; -} + struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + unsigned i, j, offset, prim; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct pipe_vertex_buffer *vertex_buffer; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + + switch (draw->index_size) { + case 2: + vgt_draw_initiator = 0; + vgt_dma_index_type = 0; + break; + case 4: + vgt_draw_initiator = 0; + vgt_dma_index_type = 1; + break; + case 0: + vgt_draw_initiator = 2; + vgt_dma_index_type = 0; + break; + default: + R600_ERR("unsupported index size %d\n", draw->index_size); + return; + } + if (r600_conv_pipe_prim(draw->mode, &prim)) + return; -static void *r600_create_blend_state(struct pipe_context *ctx, - const struct pipe_blend_state *state) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - rstate = r600_new_context_state(pipe_blend_type); - rstate->state.blend = *state; - rctx->vtbl->blend(rctx, &rstate->rstate[0], &rstate->state.blend); - - return rstate; + /* rebuild vertex shader if input format changed */ + if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader)) + return; + if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader)) + return; + + for (i = 0 ; i < rctx->vertex_elements->count; i++) { + uint32_t word2, format; + + rstate = &rctx->vs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + j = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vertex_buffer[j]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->elements[i].src_offset + + vertex_buffer->buffer_offset + + r600_bo_offset(rbuffer->bo); + + format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + + word2 = format | S_038008_STRIDE(vertex_buffer->stride); + + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, i); + } + + mask = 0; + for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + mask |= (0xF << (i * 4)); + } + + vgt.id = R600_PIPE_STATE_VGT; + vgt.nregs = 0; + r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); + /* build late state */ + if (rctx->rasterizer && rctx->framebuffer.zsbuf) { + float offset_units = rctx->rasterizer->offset_units; + unsigned offset_db_fmt_cntl = 0, depth; + + switch (rctx->framebuffer.zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + return; + } + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + r600_pipe_state_add_reg(&vgt, + R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, + offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &vgt); + + rdraw.vgt_num_indices = draw->count; + rdraw.vgt_num_instances = 1; + rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.indices = NULL; + if (draw->index_buffer) { + rbuffer = (struct r600_resource*)draw->index_buffer; + rdraw.indices = rbuffer->bo; + rdraw.indices_bo_offset = draw->index_buffer_offset; + } + r600_context_draw(&rctx->ctx, &rdraw); } -static void *r600_create_dsa_state(struct pipe_context *ctx, - const struct pipe_depth_stencil_alpha_state *state) +void r600_translate_index_buffer(struct r600_pipe_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count) { - struct r600_context_state *rstate; + switch (*index_size) { + case 1: + util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); + *index_size = 2; + *start = 0; + break; - rstate = r600_new_context_state(pipe_dsa_type); - rstate->state.dsa = *state; - return rstate; + case 2: + if (*start % 2 != 0) { + util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); + *start = 0; + } + break; + + case 4: + break; + } } -static void *r600_create_rs_state(struct pipe_context *ctx, - const struct pipe_rasterizer_state *state) +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_drawl draw; - rstate = r600_new_context_state(pipe_rasterizer_type); - rstate->state.rasterizer = *state; - return rstate; + if (rctx->any_user_vbs) { + r600_upload_user_buffers(rctx); + rctx->any_user_vbs = FALSE; + } + + memset(&draw, 0, sizeof(struct r600_drawl)); + draw.ctx = ctx; + draw.mode = info->mode; + draw.start = info->start; + draw.count = info->count; + if (info->indexed && rctx->index_buffer.buffer) { + draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; + draw.min_index = info->min_index; + draw.max_index = info->max_index; + draw.index_bias = info->index_bias; + + r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, + &rctx->index_buffer.index_size, + &draw.start, + info->count); + + draw.index_size = rctx->index_buffer.index_size; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); + draw.index_buffer_offset = draw.start * draw.index_size; + draw.start = 0; + r600_upload_index_buffer(rctx, &draw); + } else { + draw.index_size = 0; + draw.index_buffer = NULL; + draw.min_index = info->min_index; + draw.max_index = info->max_index; + draw.index_bias = info->start; + } + r600_draw_common(&draw); + + pipe_resource_reference(&draw.index_buffer, NULL); } -static void *r600_create_sampler_state(struct pipe_context *ctx, - const struct pipe_sampler_state *state) +static void r600_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - rstate = r600_new_context_state(pipe_sampler_type); - rstate->state.sampler = *state; - rctx->vtbl->sampler(rctx, &rstate->rstate[0], &rstate->state.sampler, 0); - rctx->vtbl->sampler_border(rctx, &rstate->rstate[1], &rstate->state.sampler, 0); - return rstate; + if (rstate == NULL) + return; + + rstate->id = R600_PIPE_STATE_BLEND_COLOR; + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); + rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_remove_sampler_view(struct r600_shader_sampler_states *sampler, - struct r600_context_state *rstate) +static void *r600_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *state) { - int i, j; - - for (i = 0; i < sampler->nview; i++) { - for (j = 0; j < rstate->nrstate; j++) { - if (sampler->view[i] == &rstate->rstate[j]) - sampler->view[i] = NULL; + struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); + struct r600_pipe_state *rstate; + u32 color_control, target_mask; + + if (blend == NULL) { + return NULL; + } + rstate = &blend->rstate; + + rstate->id = R600_PIPE_STATE_BLEND; + + target_mask = 0; + color_control = S_028808_PER_MRT_BLEND(1); + if (state->logicop_enable) { + color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); + } else { + color_control |= (0xcc << 16); + } + /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ + if (state->independent_blend_enable) { + for (int i = 0; i < 8; i++) { + if (state->rt[i].blend_enable) { + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + } + target_mask |= (state->rt[i].colormask << (4 * i)); + } + } else { + for (int i = 0; i < 8; i++) { + if (state->rt[0].blend_enable) { + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + } + target_mask |= (state->rt[0].colormask << (4 * i)); } } -} -static void r600_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *state) -{ - struct r600_context_state *rstate = (struct r600_context_state *)state; - struct r600_context *rctx = r600_context(ctx); + blend->cb_target_mask = target_mask; + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + color_control, 0xFFFFFFFF, NULL); + + for (int i = 0; i < 8; i++) { + unsigned eqRGB = state->rt[i].rgb_func; + unsigned srcRGB = state->rt[i].rgb_src_factor; + unsigned dstRGB = state->rt[i].rgb_dst_factor; + + unsigned eqA = state->rt[i].alpha_func; + unsigned srcA = state->rt[i].alpha_src_factor; + unsigned dstA = state->rt[i].alpha_dst_factor; + uint32_t bc = 0; + + if (!state->rt[i].blend_enable) + continue; + + bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); + bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); + bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); + + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { + bc |= S_028804_SEPARATE_ALPHA_BLEND(1); + bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); + bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); + bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); + } - /* need to search list of vs/ps sampler views and remove it from any - uggh */ - r600_remove_sampler_view(&rctx->ps_sampler, rstate); - r600_remove_sampler_view(&rctx->vs_sampler, rstate); - r600_context_state_decref(rstate); + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); + if (i == 0) { + r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); + } + } + return rstate; } -static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, - struct pipe_resource *texture, - const struct pipe_sampler_view *state) +static void r600_bind_blend_state(struct pipe_context *ctx, void *state) { - struct r600_context_state *rstate; - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; + struct r600_pipe_state *rstate; - rstate = r600_new_context_state(pipe_sampler_view_type); - rstate->state.sampler_view = *state; - rstate->state.sampler_view.texture = NULL; - pipe_reference(NULL, &texture->reference); - rstate->state.sampler_view.texture = texture; - rstate->state.sampler_view.reference.count = 1; - rstate->state.sampler_view.context = ctx; - rctx->vtbl->resource(ctx, &rstate->rstate[0], &rstate->state.sampler_view, 0); - return &rstate->state.sampler_view; + if (state == NULL) + return; + rstate = &blend->rstate; + rctx->states[rstate->id] = rstate; + rctx->cb_target_mask = blend->cb_target_mask; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_sampler_view(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views, - struct r600_shader_sampler_states *sampler, - unsigned shader_id) +static void *r600_create_dsa_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - unsigned i; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; + unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - for (i = 0; i < sampler->nview; i++) { - radeon_draw_unbind(&rctx->draw, sampler->view[i]); + if (rstate == NULL) { + return NULL; } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)views[i]; - if (rstate) { - rstate->nrstate = 0; + rstate->id = R600_PIPE_STATE_DSA; + /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ + /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be + * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will + * be set if shader use texkill instruction + */ + db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + stencil_ref_mask = 0; + stencil_ref_mask_bf = 0; + db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | + S_028800_Z_WRITE_ENABLE(state->depth.writemask) | + S_028800_ZFUNC(state->depth.func); + + /* stencil */ + if (state->stencil[0].enabled) { + db_depth_control |= S_028800_STENCIL_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); + db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); + db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); + + + stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | + S_028430_STENCILWRITEMASK(state->stencil[0].writemask); + if (state->stencil[1].enabled) { + db_depth_control |= S_028800_BACKFACE_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); + db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); + db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); + stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | + S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); } } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)views[i]; - if (rstate) { - if (rstate->nrstate >= R600_MAX_RSTATE) - continue; - if (rstate->nrstate) { - memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state)); - } - radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_RESOURCE, i, shader_id); - sampler->view[i] = &rstate->rstate[rstate->nrstate]; - rstate->nrstate++; + + /* alpha */ + alpha_test_control = 0; + alpha_ref = 0; + if (state->alpha.enabled) { + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); + alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); + alpha_ref = fui(state->alpha.ref_value); + } + + /* misc */ + db_render_control = 0; + db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | + S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | + S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); + /* TODO db_render_override depends on query */ + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028430_DB_STENCILREFMASK, stencil_ref_mask, + 0xFFFFFFFF & C_028430_STENCILREF, NULL); + r600_pipe_state_add_reg(rstate, + R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); + r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + + return rstate; +} + +static void *r600_create_rs_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *state) +{ + struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); + struct r600_pipe_state *rstate; + unsigned tmp; + unsigned prov_vtx = 1, polygon_dual_mode; + unsigned clip_rule; + + if (rs == NULL) { + return NULL; + } + + rstate = &rs->rstate; + rs->flatshade = state->flatshade; + rs->sprite_coord_enable = state->sprite_coord_enable; + + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + /* offset */ + rs->offset_units = state->offset_units; + rs->offset_scale = state->offset_scale * 12.0f; + + rstate->id = R600_PIPE_STATE_RASTERIZER; + if (state->flatshade_first) + prov_vtx = 0; + tmp = 0x00000001; + if (state->sprite_coord_enable) { + tmp |= S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(2) | + S_0286D4_PNT_SPRITE_OVRD_Y(3) | + S_0286D4_PNT_SPRITE_OVRD_Z(0) | + S_0286D4_PNT_SPRITE_OVRD_W(1); + if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { + tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); } } - sampler->nview = count; + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + + polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || + state->fill_back != PIPE_POLYGON_MODE_FILL); + r600_pipe_state_add_reg(rstate, R_028814_PA_SU_SC_MODE_CNTL, + S_028814_PROVOKING_VTX_LAST(prov_vtx) | + S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | + S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | + S_028814_FACE(!state->front_ccw) | + S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | + S_028814_POLY_MODE(polygon_dual_mode) | + S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, + S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + /* point size 12.4 fixed point */ + tmp = (unsigned)(state->point_size * 8.0); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + + tmp = (unsigned)(state->line_width * 8.0); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + + return rstate; } -static void r600_set_ps_sampler_view(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views) +static void r600_bind_rs_state(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); - r600_set_sampler_view(ctx, count, views, &rctx->ps_sampler, R600_SHADER_PS); + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (state == NULL) + return; + + rctx->flatshade = rs->flatshade; + rctx->sprite_coord_enable = rs->sprite_coord_enable; + rctx->rasterizer = rs; + + rctx->states[rs->rstate.id] = &rs->rstate; + r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); } -static void r600_set_vs_sampler_view(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views) +static void r600_delete_rs_state(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); - r600_set_sampler_view(ctx, count, views, &rctx->vs_sampler, R600_SHADER_VS); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + + if (rctx->rasterizer == rs) { + rctx->rasterizer = NULL; + } + if (rctx->states[rs->rstate.id] == &rs->rstate) { + rctx->states[rs->rstate.id] = NULL; + } + free(rs); } -static void *r600_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) +static void *r600_create_sampler_state(struct pipe_context *ctx, + const struct pipe_sampler_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - int r; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + union util_color uc; - rstate = r600_new_context_state(pipe_shader_type); - rstate->state.shader = *state; - r = r600_pipe_shader_create(&rctx->context, rstate, rstate->state.shader.tokens); - if (r) { - r600_context_state_decref(rstate); + if (rstate == NULL) { return NULL; } + + rstate->id = R600_PIPE_STATE_SAMPLER; + util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, + S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | + S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | + S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | + S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | + S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | + S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | + S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + /* FIXME LOD it depends on texture base level ... */ + r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, + S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | + S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | + S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + if (uc.ui) { + r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + } return rstate; } @@ -208,192 +590,242 @@ static void *r600_create_vertex_elements(struct pipe_context *ctx, assert(count < 32); v->count = count; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); v->refcount = 1; + memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); return v; } -static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) +static void r600_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *state) { - struct r600_vertex_element *v = (struct r600_vertex_element*)state; + struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; - if (v == NULL) - return; - if (--v->refcount) - return; - free(v); + pipe_resource_reference(&state->texture, NULL); + FREE(resource); } -static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) +static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_vertex_element *v = (struct r600_vertex_element*)state; + struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); + struct r600_pipe_state *rstate; + const struct util_format_description *desc; + struct r600_resource_texture *tmp; + struct r600_resource *rbuffer; + unsigned format; + uint32_t word4 = 0, yuv_format = 0, pitch = 0; + unsigned char swizzle[4], array_mode = 0, tile_type = 0; + struct r600_bo *bo[2]; - r600_delete_vertex_element(ctx, rctx->vertex_elements); - rctx->vertex_elements = v; - if (v) { - v->refcount++; + if (resource == NULL) + return NULL; + rstate = &resource->state; + + /* initialize base object */ + resource->base = *state; + resource->base.texture = NULL; + pipe_reference(NULL, &texture->reference); + resource->base.texture = texture; + resource->base.reference.count = 1; + resource->base.context = ctx; + + swizzle[0] = state->swizzle_r; + swizzle[1] = state->swizzle_g; + swizzle[2] = state->swizzle_b; + swizzle[3] = state->swizzle_a; + format = r600_translate_texformat(state->format, + swizzle, + &word4, &yuv_format); + if (format == ~0) { + format = 0; + } + desc = util_format_description(state->format); + if (desc == NULL) { + R600_ERR("unknow format %d\n", state->format); + } + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + /* FIXME depth texture decompression */ + if (tmp->depth) { + r600_texture_depth_flush(ctx, texture); + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->flushed_depth_texture->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; } + pitch = align(tmp->pitch_in_pixels[0], 8); + if (tmp->tiled) { + array_mode = tmp->array_mode; + tile_type = tmp->tile_type; + } + + /* FIXME properly handle first level != 0 */ + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, + S_038000_DIM(r600_tex_dim(texture->target)) | + S_038000_TILE_MODE(array_mode) | + S_038000_TILE_TYPE(tile_type) | + S_038000_PITCH((pitch / 8) - 1) | + S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, + S_038004_TEX_HEIGHT(texture->height0 - 1) | + S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, + (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, + (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, + word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | + S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | + S_038010_REQUEST_SIZE(1) | + S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, + S_038014_LAST_LEVEL(state->last_level) | + S_038014_BASE_ARRAY(0) | + S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, + S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); + + return &resource->base; } -static void r600_bind_rasterizer_state(struct pipe_context *ctx, void *state) +static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - if (state == NULL) - return; - rctx->rasterizer = r600_context_state_decref(rctx->rasterizer); - rctx->rasterizer = r600_context_state_incref(rstate); + for (int i = 0; i < count; i++) { + if (resource[i]) { + r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + } + } } -static void r600_bind_blend_state(struct pipe_context *ctx, void *state) +static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - if (state == NULL) - return; - rctx->blend = r600_context_state_decref(rctx->blend); - rctx->blend = r600_context_state_incref(rstate); + rctx->ps_samplers.views = resource; + rctx->ps_samplers.n_views = count; + for (int i = 0; i < count; i++) { + if (resource[i]) { + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + } + } } -static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) +static void r600_bind_state(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; if (state == NULL) return; - rctx->dsa = r600_context_state_decref(rctx->dsa); - rctx->dsa = r600_context_state_incref(rstate); -} - -static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; - - rctx->ps_shader = r600_context_state_decref(rctx->ps_shader); - rctx->ps_shader = r600_context_state_incref(rstate); + rctx->states[rstate->id] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) +static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - rctx->vs_shader = r600_context_state_decref(rctx->vs_shader); - rctx->vs_shader = r600_context_state_incref(rstate); -} + rctx->ps_samplers.samplers = states; + rctx->ps_samplers.n_samplers = count; -static void r600_bind_sampler_shader(struct pipe_context *ctx, - unsigned count, void **states, - struct r600_shader_sampler_states *sampler, unsigned shader_id) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - unsigned i; - - for (i = 0; i < sampler->nsampler; i++) { - radeon_draw_unbind(&rctx->draw, sampler->sampler[i]); - } - for (i = 0; i < sampler->nborder; i++) { - radeon_draw_unbind(&rctx->draw, sampler->border[i]); + for (int i = 0; i < count; i++) { + r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)states[i]; - if (rstate) { - rstate->nrstate = 0; - } - } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)states[i]; - if (rstate) { - if (rstate->nrstate >= R600_MAX_RSTATE) - continue; - if (rstate->nrstate) { - memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state)); - memcpy(&rstate->rstate[rstate->nrstate+1], &rstate->rstate[1], sizeof(struct radeon_state)); - } - radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_SAMPLER, i, shader_id); - radeon_state_convert(&rstate->rstate[rstate->nrstate + 1], R600_STATE_SAMPLER_BORDER, i, shader_id); - sampler->sampler[i] = &rstate->rstate[rstate->nrstate]; - sampler->border[i] = &rstate->rstate[rstate->nrstate + 1]; - rstate->nrstate += 2; - } - } - sampler->nsampler = count; - sampler->nborder = count; } -static void r600_bind_ps_sampler(struct pipe_context *ctx, - unsigned count, void **states) +static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) { - struct r600_context *rctx = r600_context(ctx); - r600_bind_sampler_shader(ctx, count, states, &rctx->ps_sampler, R600_SHADER_PS); -} + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; -static void r600_bind_vs_sampler(struct pipe_context *ctx, - unsigned count, void **states) -{ - struct r600_context *rctx = r600_context(ctx); - r600_bind_sampler_shader(ctx, count, states, &rctx->vs_sampler, R600_SHADER_VS); + for (int i = 0; i < count; i++) { + r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); + } } static void r600_delete_state(struct pipe_context *ctx, void *state) { - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - r600_context_state_decref(rstate); + if (rctx->states[rstate->id] == rstate) { + rctx->states[rstate->id] = NULL; + } + for (int i = 0; i < rstate->nregs; i++) { + r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + } + free(rstate); } -static void r600_set_blend_color(struct pipe_context *ctx, - const struct pipe_blend_color *color) +static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); + struct r600_vertex_element *v = (struct r600_vertex_element*)state; - rctx->blend_color = *color; + if (v == NULL) + return; + if (--v->refcount) + return; + free(v); } static void r600_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - r600_context_state_decref(rctx->clip); + if (rstate == NULL) + return; - rstate = r600_new_context_state(pipe_clip_type); - rstate->state.clip = *state; - rctx->vtbl->ucp(rctx, &rstate->rstate[0], &rstate->state.clip); - rctx->clip = rstate; + rctx->clip = *state; + rstate->id = R600_PIPE_STATE_CLIP; + for (int i = 0; i < state->nr; i++) { + r600_pipe_state_add_reg(rstate, + R_028E20_PA_CL_UCP0_X + i * 4, + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028E24_PA_CL_UCP0_Y + i * 4, + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028E28_PA_CL_UCP0_Z + i * 4, + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028E2C_PA_CL_UCP0_W + i * 4, + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + } + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, + S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | + S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_CLIP]); + rctx->states[R600_PIPE_STATE_CLIP] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_framebuffer_state(struct pipe_context *ctx, - const struct pipe_framebuffer_state *state) +static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - int i; - - r600_context_state_decref(rctx->framebuffer); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_vertex_element *v = (struct r600_vertex_element*)state; - rstate = r600_new_context_state(pipe_framebuffer_type); - rstate->state.framebuffer = *state; - for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { - pipe_reference(NULL, &state->cbufs[i]->reference); - } - pipe_reference(NULL, &state->zsbuf->reference); - rctx->framebuffer = rstate; - for (i = 0; i < state->nr_cbufs; i++) { - rctx->vtbl->cb(rctx, &rstate->rstate[i+1], state, i); - } - if (state->zsbuf) { - rctx->vtbl->db(rctx, &rstate->rstate[0], state); + r600_delete_vertex_element(ctx, rctx->vertex_elements); + rctx->vertex_elements = v; + if (v) { + v->refcount++; +// rctx->vs_rebuild = TRUE; } - return; } static void r600_set_polygon_stipple(struct pipe_context *ctx, @@ -408,55 +840,296 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask static void r600_set_scissor_state(struct pipe_context *ctx, const struct pipe_scissor_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 tl, br; - r600_context_state_decref(rctx->scissor); + if (rstate == NULL) + return; - rstate = r600_new_context_state(pipe_scissor_type); - rstate->state.scissor = *state; - rctx->scissor = rstate; + rstate->id = R600_PIPE_STATE_SCISSOR; + tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + r600_pipe_state_add_reg(rstate, + R_028210_PA_SC_CLIPRECT_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028214_PA_SC_CLIPRECT_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028218_PA_SC_CLIPRECT_1_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02821C_PA_SC_CLIPRECT_1_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028220_PA_SC_CLIPRECT_2_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028224_PA_SC_CLIPRECT_2_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028228_PA_SC_CLIPRECT_3_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02822C_PA_SC_CLIPRECT_3_BR, br, + 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_SCISSOR]); + rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } static void r600_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 tmp; - r600_context_state_decref(rctx->stencil_ref); + if (rstate == NULL) + return; - rstate = r600_new_context_state(pipe_stencil_ref_type); - rstate->state.stencil_ref = *state; - rctx->stencil_ref = rstate; + rctx->stencil_ref = *state; + rstate->id = R600_PIPE_STATE_STENCIL_REF; + tmp = S_028430_STENCILREF(state->ref_value[0]); + r600_pipe_state_add_reg(rstate, + R_028430_DB_STENCILREFMASK, tmp, + ~C_028430_STENCILREF, NULL); + tmp = S_028434_STENCILREF_BF(state->ref_value[1]); + r600_pipe_state_add_reg(rstate, + R_028434_DB_STENCILREFMASK_BF, tmp, + ~C_028434_STENCILREF_BF, NULL); + + free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); + rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_vertex_buffers(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_buffer *buffers) +static void r600_set_viewport_state(struct pipe_context *ctx, + const struct pipe_viewport_state *state) { - struct r600_context *rctx = r600_context(ctx); - unsigned i; - boolean any_user_buffers = FALSE; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - for (i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + if (rstate == NULL) + return; + + rctx->viewport = *state; + rstate->id = R600_PIPE_STATE_VIEWPORT; + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_VIEWPORT]); + rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, + const struct pipe_framebuffer_state *state, int cb) +{ + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + unsigned level = state->cbufs[cb]->level; + unsigned pitch, slice; + unsigned color_info; + unsigned format, swap, ntype; + const struct util_format_description *desc; + struct r600_bo *bo[3]; + + rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + rbuffer = &rtex->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + bo[2] = rbuffer->bo; + + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1; + ntype = 0; + desc = util_format_description(rtex->resource.base.b.format); + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + ntype = V_0280A0_NUMBER_SRGB; + + format = r600_translate_colorformat(rtex->resource.base.b.format); + swap = r600_translate_colorswap(rtex->resource.base.b.format); + color_info = S_0280A0_FORMAT(format) | + S_0280A0_COMP_SWAP(swap) | + S_0280A0_ARRAY_MODE(rtex->array_mode); + S_0280A0_BLEND_CLAMP(1) | + S_0280A0_NUMBER_TYPE(ntype); + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + color_info |= S_0280A0_SOURCE_FORMAT(1); + + r600_pipe_state_add_reg(rstate, + R_028040_CB_COLOR0_BASE + cb * 4, + (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, + R_0280A0_CB_COLOR0_INFO + cb * 4, + color_info, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, + R_028060_CB_COLOR0_SIZE + cb * 4, + S_028060_PITCH_TILE_MAX(pitch) | + S_028060_SLICE_TILE_MAX(slice), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028080_CB_COLOR0_VIEW + cb * 4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0280E0_CB_COLOR0_FRAG + cb * 4, + r600_bo_offset(bo[1]) >> 8, 0xFFFFFFFF, bo[1]); + r600_pipe_state_add_reg(rstate, + R_0280C0_CB_COLOR0_TILE + cb * 4, + r600_bo_offset(bo[2]) >> 8, 0xFFFFFFFF, bo[2]); + r600_pipe_state_add_reg(rstate, + R_028100_CB_COLOR0_MASK + cb * 4, + 0x00000000, 0xFFFFFFFF, NULL); +} + +static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, + const struct pipe_framebuffer_state *state) +{ + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + unsigned level; + unsigned pitch, slice, format; + + if (state->zsbuf == NULL) + return; + + rtex = (struct r600_resource_texture*)state->zsbuf->texture; + rtex->tiled = 1; + rtex->array_mode = 2; + rtex->tile_type = 1; + rtex->depth = 1; + rbuffer = &rtex->resource; + + level = state->zsbuf->level; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; + format = r600_translate_dbformat(state->zsbuf->texture->format); + + r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, + (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, + S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO, + S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format), + 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, + (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL); +} + +static void r600_set_framebuffer_state(struct pipe_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 shader_mask, tl, br, shader_control, target_mask; + + if (rstate == NULL) + return; + + /* unreference old buffer and reference new one */ + rstate->id = R600_PIPE_STATE_FRAMEBUFFER; + + util_copy_framebuffer_state(&rctx->framebuffer, state); + + rctx->pframebuffer = &rctx->framebuffer; + + /* build states */ + for (int i = 0; i < state->nr_cbufs; i++) { + r600_cb(rctx, rstate, state, i); } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - for (i = 0; i < count; i++) { - rctx->vertex_buffer[i].buffer = NULL; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) - any_user_buffers = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + if (state->zsbuf) { + r600_db(rctx, rstate, state); } - rctx->any_user_vbs = any_user_buffers; - rctx->nvertex_buffer = count; + + target_mask = 0x00000000; + target_mask = 0xFFFFFFFF; + shader_mask = 0; + shader_control = 0; + for (int i = 0; i < state->nr_cbufs; i++) { + target_mask ^= 0xf << (i * 4); + shader_mask |= 0xf << (i * 4); + shader_control |= 1 << i; + } + tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); + + r600_pipe_state_add_reg(rstate, + R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028034_PA_SC_SCREEN_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028208_PA_SC_WINDOW_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028244_PA_SC_GENERIC_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, + 0xFFFFFFFF, NULL); + if (rctx->family >= CHIP_RV770) { + r600_pipe_state_add_reg(rstate, + R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, + 0xFFFFFFFF, NULL); + } + + r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL, + shader_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, + 0x00000000, target_mask, NULL); + r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, + shader_mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C30_CB_CLRCMP_CONTROL, + 0x01000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C34_CB_CLRCMP_SRC, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C38_CB_CLRCMP_DST, + 0x000000FF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C3C_CB_CLRCMP_MSK, + 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C48_PA_SC_AA_MASK, + 0xFFFFFFFF, 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); + rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } static void r600_set_index_buffer(struct pipe_context *ctx, const struct pipe_index_buffer *ib) { - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; if (ib) { pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); @@ -469,21 +1142,113 @@ static void r600_set_index_buffer(struct pipe_context *ctx, /* TODO make this more like a state */ } -static void r600_set_viewport_state(struct pipe_context *ctx, - const struct pipe_viewport_state *state) +static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, + const struct pipe_vertex_buffer *buffers) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - r600_context_state_decref(rctx->viewport); + for (int i = 0; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + } + memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + for (int i = 0; i < count; i++) { + rctx->vertex_buffer[i].buffer = NULL; + if (r600_buffer_is_user_buffer(buffers[i].buffer)) + rctx->any_user_vbs = TRUE; + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + } + rctx->nvertex_buffer = count; +} - rstate = r600_new_context_state(pipe_viewport_type); - rstate->state.viewport = *state; - rctx->vtbl->viewport(rctx, &rstate->rstate[0], &rstate->state.viewport); - rctx->viewport = rstate; +static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer = (struct r600_resource*)buffer; + + switch (shader) { + case PIPE_SHADER_VERTEX: + rctx->vs_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028980_ALU_CONST_CACHE_VS_0, + r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + break; + case PIPE_SHADER_FRAGMENT: + rctx->ps_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028940_ALU_CONST_CACHE_PS_0, + r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + break; + default: + R600_ERR("unsupported %d\n", shader); + return; + } } -void r600_init_state_functions(struct r600_context *rctx) +static void *r600_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); + int r; + + r = r600_pipe_shader_create(ctx, shader, state->tokens); + if (r) { + return NULL; + } + return shader; +} + +static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->ps_shader = (struct r600_pipe_shader *)state; +} + +static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->vs_shader = (struct r600_pipe_shader *)state; +} + +static void r600_delete_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->ps_shader == shader) { + rctx->ps_shader = NULL; + } + /* TODO proper delete */ + free(shader); +} + +static void r600_delete_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->vs_shader == shader) { + rctx->vs_shader = NULL; + } + /* TODO proper delete */ + free(shader); +} + +void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; @@ -494,30 +1259,23 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.create_vertex_elements_state = r600_create_vertex_elements; rctx->context.create_vs_state = r600_create_shader_state; rctx->context.bind_blend_state = r600_bind_blend_state; - rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state; + rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler; rctx->context.bind_fs_state = r600_bind_ps_shader; - rctx->context.bind_rasterizer_state = r600_bind_rasterizer_state; + rctx->context.bind_rasterizer_state = r600_bind_rs_state; rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements; rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler; rctx->context.bind_vs_state = r600_bind_vs_shader; rctx->context.delete_blend_state = r600_delete_state; rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; - rctx->context.delete_fs_state = r600_delete_state; - rctx->context.delete_rasterizer_state = r600_delete_state; + rctx->context.delete_fs_state = r600_delete_ps_shader; + rctx->context.delete_rasterizer_state = r600_delete_rs_state; rctx->context.delete_sampler_state = r600_delete_state; rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; - rctx->context.delete_vs_state = r600_delete_state; + rctx->context.delete_vs_state = r600_delete_vs_shader; rctx->context.set_blend_color = r600_set_blend_color; rctx->context.set_clip_state = r600_set_clip_state; - - if (rctx->screen->chip_class == EVERGREEN) - rctx->context.set_constant_buffer = eg_set_constant_buffer; - else if (rctx->screen->use_mem_constant) - rctx->context.set_constant_buffer = r600_set_constant_buffer_mem; - else - rctx->context.set_constant_buffer = r600_set_constant_buffer_file; - + rctx->context.set_constant_buffer = r600_set_constant_buffer; rctx->context.set_fragment_sampler_views = r600_set_ps_sampler_view; rctx->context.set_framebuffer_state = r600_set_framebuffer_state; rctx->context.set_polygon_stipple = r600_set_polygon_stipple; @@ -531,169 +1289,291 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.sampler_view_destroy = r600_sampler_view_destroy; } -struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate) +void r600_init_config(struct r600_pipe_context *rctx) { - if (rstate == NULL) - return NULL; - rstate->refcount++; - return rstate; -} - -struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate) -{ - unsigned i; - - if (rstate == NULL) - return NULL; - if (--rstate->refcount) - return NULL; - switch (rstate->type) { - case pipe_sampler_view_type: - pipe_resource_reference(&rstate->state.sampler_view.texture, NULL); - break; - case pipe_framebuffer_type: - for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rstate->state.framebuffer.cbufs[i], NULL); - } - pipe_surface_reference(&rstate->state.framebuffer.zsbuf, NULL); + int ps_prio; + int vs_prio; + int gs_prio; + int es_prio; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + enum radeon_family family; + struct r600_pipe_state *rstate = &rctx->config; + u32 tmp; + + family = r600_get_family(rctx->radeon); + ps_prio = 0; + vs_prio = 1; + gs_prio = 2; + es_prio = 3; + switch (family) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; break; - case pipe_viewport_type: - case pipe_depth_type: - case pipe_rasterizer_type: - case pipe_poly_stipple_type: - case pipe_scissor_type: - case pipe_clip_type: - case pipe_stencil_type: - case pipe_alpha_type: - case pipe_dsa_type: - case pipe_blend_type: - case pipe_stencil_ref_type: - case pipe_shader_type: - case pipe_sampler_type: + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: default: - R600_ERR("invalid type %d\n", rstate->type); - return NULL; - } - radeon_state_fini(&rstate->rstate[0]); - FREE(rstate); - return NULL; -} - -static void r600_bind_shader_sampler(struct r600_context *rctx, struct r600_shader_sampler_states *sampler) -{ - int i; - - for (i = 0; i < sampler->nsampler; i++) { - if (sampler->sampler[i]) - radeon_draw_bind(&rctx->draw, sampler->sampler[i]); + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; } - for (i = 0; i < sampler->nborder; i++) { - if (sampler->border[i]) - radeon_draw_bind(&rctx->draw, sampler->border[i]); - } + rstate->id = R600_PIPE_STATE_CONFIG; - for (i = 0; i < sampler->nview; i++) { - if (sampler->view[i]) - radeon_draw_bind(&rctx->draw, sampler->view[i]); + /* SQ_CONFIG */ + tmp = 0; + switch (family) { + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_RV710: + break; + default: + tmp |= S_008C00_VC_ENABLE(1); + break; } -} - - -static int setup_cb_flush(struct r600_context *rctx, struct radeon_state *flush) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - struct pipe_surface *surf; - int i; - - radeon_state_init(flush, rscreen->rw, R600_STATE_CB_FLUSH, 0, 0); - - for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) { - surf = rctx->framebuffer->state.framebuffer.cbufs[i]; - - rtex = (struct r600_resource_texture*)surf->texture; - rbuffer = &rtex->resource; - /* just need to the bo to the flush list */ - radeon_ws_bo_reference(rscreen->rw, &flush->bo[i], rbuffer->bo); - flush->placement[i] = RADEON_GEM_DOMAIN_VRAM; + tmp |= S_008C00_DX9_CONSTS(0); + tmp |= S_008C00_ALU_INST_PREFER_VECTOR(1); + tmp |= S_008C00_PS_PRIO(ps_prio); + tmp |= S_008C00_VS_PRIO(vs_prio); + tmp |= S_008C00_GS_PRIO(gs_prio); + tmp |= S_008C00_ES_PRIO(es_prio); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + + /* SQ_GPR_RESOURCE_MGMT_1 */ + tmp = 0; + tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + /* SQ_GPR_RESOURCE_MGMT_2 */ + tmp = 0; + tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + /* SQ_THREAD_RESOURCE_MGMT */ + tmp = 0; + tmp |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + + /* SQ_STACK_RESOURCE_MGMT_1 */ + tmp = 0; + tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + /* SQ_STACK_RESOURCE_MGMT_2 */ + tmp = 0; + tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); + + if (family >= CHIP_RV770) { + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); + } else { + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL); } - flush->nbo = rctx->framebuffer->state.framebuffer.nr_cbufs; - return radeon_state_pm4(flush); + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static int setup_db_flush(struct r600_context *rctx, struct radeon_state *flush) +void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) { - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - struct pipe_surface *surf; - - surf = rctx->framebuffer->state.framebuffer.zsbuf; - - if (!surf) - return 0; - - radeon_state_init(flush, rscreen->rw, R600_STATE_DB_FLUSH, 0, 0); - rtex = (struct r600_resource_texture*)surf->texture; - rbuffer = &rtex->resource; - /* just need to the bo to the flush list */ - radeon_ws_bo_reference(rscreen->rw, &flush->bo[0], rbuffer->bo); - flush->placement[0] = RADEON_GEM_DOMAIN_VRAM; - - flush->nbo = 1; - return radeon_state_pm4(flush); -} - -int r600_context_hw_states(struct pipe_context *ctx) -{ - struct r600_context *rctx = r600_context(ctx); - unsigned i; - - /* build new states */ - rctx->vtbl->rasterizer(rctx, &rctx->hw_states.rasterizer); - rctx->vtbl->scissor(rctx, &rctx->hw_states.scissor); - rctx->vtbl->dsa(rctx, &rctx->hw_states.dsa); - rctx->vtbl->cb_cntl(rctx, &rctx->hw_states.cb_cntl); - - /* setup flushes */ - setup_db_flush(rctx, &rctx->hw_states.db_flush); - setup_cb_flush(rctx, &rctx->hw_states.cb_flush); - - /* bind states */ - radeon_draw_bind(&rctx->draw, &rctx->config); - - radeon_draw_bind(&rctx->draw, &rctx->hw_states.rasterizer); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.scissor); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.dsa); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_cntl); - - radeon_draw_bind(&rctx->draw, &rctx->hw_states.db_flush); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_flush); - - radeon_draw_bind(&rctx->draw, &rctx->hw_states.db_flush); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_flush); - - if (rctx->viewport) { - radeon_draw_bind(&rctx->draw, &rctx->viewport->rstate[0]); - } - if (rctx->blend) { - radeon_draw_bind(&rctx->draw, &rctx->blend->rstate[0]); - } - if (rctx->clip) { - radeon_draw_bind(&rctx->draw, &rctx->clip->rstate[0]); - } - for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) { - radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[i+1]); - } - if (rctx->framebuffer->state.framebuffer.zsbuf) { - radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[0]); + struct pipe_depth_stencil_alpha_state dsa; + struct r600_pipe_state *rstate; + boolean quirk = false; + + if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || + rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) + quirk = true; + + memset(&dsa, 0, sizeof(dsa)); + + if (quirk) { + dsa.depth.enabled = 1; + dsa.depth.func = PIPE_FUNC_LEQUAL; + dsa.stencil[0].enabled = 1; + dsa.stencil[0].func = PIPE_FUNC_ALWAYS; + dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_INCR; + dsa.stencil[0].writemask = 0xff; } - r600_bind_shader_sampler(rctx, &rctx->vs_sampler); - r600_bind_shader_sampler(rctx, &rctx->ps_sampler); - - return 0; + rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + 0x0, + S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + r600_pipe_state_add_reg(rstate, + R_028D0C_DB_RENDER_CONTROL, + S_028D0C_DEPTH_COPY_ENABLE(1) | + S_028D0C_STENCIL_COPY_ENABLE(1) | + S_028D0C_COPY_CENTROID(1), + S_028D0C_DEPTH_COPY_ENABLE(1) | + S_028D0C_STENCIL_COPY_ENABLE(1) | + S_028D0C_COPY_CENTROID(1), NULL); + return rstate; } diff --git a/src/gallium/drivers/r600/r600_state2.c b/src/gallium/drivers/r600/r600_state2.c deleted file mode 100644 index 86c10a877d..0000000000 --- a/src/gallium/drivers/r600/r600_state2.c +++ /dev/null @@ -1,2228 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* TODO: - * - fix mask for depth control & cull for query - */ -#include <stdio.h> -#include <errno.h> -#include <pipe/p_defines.h> -#include <pipe/p_state.h> -#include <pipe/p_context.h> -#include <tgsi/tgsi_scan.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_util.h> -#include <util/u_blitter.h> -#include <util/u_double_list.h> -#include <util/u_transfer.h> -#include <util/u_surface.h> -#include <util/u_pack_color.h> -#include <util/u_memory.h> -#include <util/u_inlines.h> -#include <pipebuffer/pb_buffer.h> -#include "r600.h" -#include "r600d.h" -#include "r700_sq.h" -struct radeon_state { - unsigned dummy; -}; -#include "r600_resource.h" -#include "r600_shader.h" - - -uint32_t r600_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view, - uint32_t *word4_p, uint32_t *yuv_format_p); - -#include "r600_state_inlines.h" - -enum chip_class { - R600, - R700, - EVERGREEN, -}; - -enum r600_pipe_state_id { - R600_PIPE_STATE_BLEND = 0, - R600_PIPE_STATE_BLEND_COLOR, - R600_PIPE_STATE_CONFIG, - R600_PIPE_STATE_CLIP, - R600_PIPE_STATE_SCISSOR, - R600_PIPE_STATE_VIEWPORT, - R600_PIPE_STATE_RASTERIZER, - R600_PIPE_STATE_VGT, - R600_PIPE_STATE_FRAMEBUFFER, - R600_PIPE_STATE_DSA, - R600_PIPE_STATE_STENCIL_REF, - R600_PIPE_STATE_PS_SHADER, - R600_PIPE_STATE_VS_SHADER, - R600_PIPE_STATE_CONSTANT, - R600_PIPE_STATE_SAMPLER, - R600_PIPE_STATE_RESOURCE, - R600_PIPE_NSTATES -}; - -struct r600_screen { - struct pipe_screen screen; - struct radeon *radeon; - unsigned chip_class; -}; - -struct r600_pipe_sampler_view { - struct pipe_sampler_view base; - struct r600_pipe_state state; -}; - -struct r600_pipe_rasterizer { - struct r600_pipe_state rstate; - bool flatshade; - unsigned sprite_coord_enable; -}; - -struct r600_pipe_blend { - struct r600_pipe_state rstate; - unsigned cb_target_mask; -}; - -struct r600_pipe_shader { - struct r600_shader shader; - struct r600_pipe_state rstate; - struct radeon_ws_bo *bo; -}; - -struct r600_vertex_element -{ - unsigned count; - unsigned refcount; - struct pipe_vertex_element elements[32]; -}; - -struct r600_pipe_context { - struct pipe_context context; - struct r600_screen *screen; - struct radeon *radeon; - struct blitter_context *blitter; - struct r600_pipe_state *states[R600_PIPE_NSTATES]; - struct r600_context ctx; - struct r600_vertex_element *vertex_elements; - struct pipe_framebuffer_state framebuffer; - struct pipe_index_buffer index_buffer; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nvertex_buffer; - unsigned cb_target_mask; - /* for saving when using blitter */ - struct pipe_stencil_ref stencil_ref; - struct pipe_viewport_state viewport; - struct pipe_clip_state clip; - unsigned vs_nconst; - unsigned ps_nconst; - struct r600_pipe_state vs_const[256]; - struct r600_pipe_state ps_const[256]; - struct r600_pipe_state vs_resource[160]; - struct r600_pipe_state ps_resource[160]; - struct r600_pipe_state config; - struct r600_pipe_shader *ps_shader; - struct r600_pipe_shader *vs_shader; - /* shader information */ - bool ps_rebuild; - bool vs_rebuild; - unsigned sprite_coord_enable; - bool flatshade; -}; - -static INLINE u32 S_FIXED(float value, u32 frac_bits) -{ - return value * (1 << frac_bits); -} - -/* r600_shader.c */ -static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned spi_vs_out_id[10]; - unsigned i, tmp; - - /* clear previous register */ - rstate->nregs = 0; - - /* so far never got proper semantic id from tgsi */ - for (i = 0; i < 10; i++) { - spi_vs_out_id[i] = 0; - } - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - spi_vs_out_id[i / 4] |= tmp; - } - for (i = 0; i < 10; i++) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028614_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); - } - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028868_SQ_PGM_RESOURCES_VS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288D0_SQ_PGM_CF_OFFSET_VS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028858_SQ_PGM_START_VS, - 0x00000000, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028894_SQ_PGM_START_FS, - 0x00000000, 0xFFFFFFFF, shader->bo); - rctx->vs_rebuild = false; -} - -static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; - boolean have_pos = FALSE; - - /* clear previous register */ - rstate->nregs = 0; - - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(i); - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - if (rctx->sprite_coord_enable & (1 << i)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - exports_ps |= (1 << (num_cout+1)); - num_cout++; - } - } - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - - spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - spi_input_z = 0; - if (have_pos) { - spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | - S_0286CC_BARYC_SAMPLE_CNTL(1); - spi_input_z |= 1; - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D0_SPI_PS_IN_CONTROL_1, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028840_SQ_PGM_START_PS, - 0x00000000, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028850_SQ_PGM_RESOURCES_PS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028854_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288CC_SQ_PGM_CF_OFFSET_PS, - 0x00000000, 0xFFFFFFFF, NULL); - rctx->ps_rebuild = false; -} - -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_shader *rshader = &shader->shader; - void *ptr; - - /* copy new shader */ - if (shader->bo == NULL) { - shader->bo = radeon_ws_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0); - if (shader->bo == NULL) { - return -ENOMEM; - } - ptr = radeon_ws_bo_map(rctx->radeon, shader->bo, 0, NULL); - memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); - radeon_ws_bo_unmap(rctx->radeon, shader->bo); - } - /* build state */ - rshader->flat_shade = rctx->flatshade; - switch (rshader->processor_type) { - case TGSI_PROCESSOR_VERTEX: - r600_pipe_shader_vs(ctx, shader); - break; - case TGSI_PROCESSOR_FRAGMENT: - r600_pipe_shader_ps(ctx, shader); - break; - default: - return -EINVAL; - } - r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); - return 0; -} - -static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_shader *shader = &rshader->shader; - const struct util_format_description *desc; - enum pipe_format resource_format[160]; - unsigned i, nresources = 0; - struct r600_bc *bc = &shader->bc; - struct r600_bc_cf *cf; - struct r600_bc_vtx *vtx; - - if (shader->processor_type != TGSI_PROCESSOR_VERTEX) - return 0; - for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; - } - radeon_ws_bo_reference(rctx->radeon, &rshader->bo, NULL); - LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { - switch (cf->inst) { - case V_SQ_CF_WORD1_SQ_CF_INST_VTX: - case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - desc = util_format_description(resource_format[vtx->buffer_id]); - if (desc == NULL) { - R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); - return -EINVAL; - } - vtx->dst_sel_x = desc->swizzle[0]; - vtx->dst_sel_y = desc->swizzle[1]; - vtx->dst_sel_z = desc->swizzle[2]; - vtx->dst_sel_w = desc->swizzle[3]; - } - break; - default: - break; - } - } - return r600_bc_build(&shader->bc); -} - -static int r600_pipe_shader_update2(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - int r; - - if (shader == NULL) - return -EINVAL; - if (shader->bo) { - switch (shader->shader.processor_type) { - case TGSI_PROCESSOR_VERTEX: - if (!rctx->vs_rebuild) - return 0; - break; - case TGSI_PROCESSOR_FRAGMENT: - if (!rctx->ps_rebuild) - return 0; - break; - default: - return -EINVAL; - } - } - /* there should be enough input */ - if (rctx->vertex_elements->count < shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, shader->shader.bc.nresource); - return -EINVAL; - } - r = r600_shader_update(ctx, shader); - if (r) - return r; - return r600_pipe_shader(ctx, shader); -} - -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); -static int r600_pipe_shader_create2(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - int r; - -//fprintf(stderr, "--------------------------------------------------------------\n"); -//tgsi_dump(tokens, 0); - shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader); - if (r) { - R600_ERR("translation from TGSI failed !\n"); - return r; - } - r = r600_bc_build(&shader->shader.bc); - if (r) { - R600_ERR("building bytecode failed !\n"); - return r; - } -//fprintf(stderr, "______________________________________________________________\n"); - return 0; -} -/* r600_shader.c END */ - -static const char* r600_get_vendor(struct pipe_screen* pscreen) -{ - return "X.Org"; -} - -static const char* r600_get_name(struct pipe_screen* pscreen) -{ - struct r600_screen *rscreen = (struct r600_screen *)pscreen; - enum radeon_family family = r600_get_family(rscreen->radeon); - - if (family >= CHIP_R600 && family < CHIP_RV770) - return "R600 (HD2XXX,HD3XXX)"; - else - return "R700 (HD4XXX)"; -} - -static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) -{ - switch (param) { - /* Supported features (boolean caps). */ - case PIPE_CAP_NPOT_TEXTURES: - case PIPE_CAP_TWO_SIDED_STENCIL: - case PIPE_CAP_GLSL: - case PIPE_CAP_DUAL_SOURCE_BLEND: - case PIPE_CAP_ANISOTROPIC_FILTER: - case PIPE_CAP_POINT_SPRITE: - case PIPE_CAP_OCCLUSION_QUERY: - case PIPE_CAP_TEXTURE_SHADOW_MAP: - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_SM3: - case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_INDEP_BLEND_ENABLE: - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - case PIPE_CAP_DEPTH_CLAMP: - return 1; - - /* Unsupported features (boolean caps). */ - case PIPE_CAP_TIMER_QUERY: - case PIPE_CAP_STREAM_OUTPUT: - case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ - return 0; - - /* Texturing. */ - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 14; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - /* FIXME allow this once infrastructure is there */ - return 0; - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - - /* Render targets. */ - case PIPE_CAP_MAX_RENDER_TARGETS: - /* FIXME some r6xx are buggy and can only do 4 */ - return 8; - - /* Fragment coordinate conventions. */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - - default: - R600_ERR("r600: unknown param %d\n", param); - return 0; - } -} - -static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - default: - R600_ERR("r600: unsupported paramf %d\n", param); - return 0.0f; - } -} - -static boolean r600_is_format_supported(struct pipe_screen* screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned sample_count, - unsigned usage, - unsigned geom_flags) -{ - unsigned retval = 0; - if (target >= PIPE_MAX_TEXTURE_TYPES) { - R600_ERR("r600: unsupported texture type %d\n", target); - return FALSE; - } - - /* Multisample */ - if (sample_count > 1) - return FALSE; - - if ((usage & PIPE_BIND_SAMPLER_VIEW) && - r600_is_sampler_format_supported(format)) { - retval |= PIPE_BIND_SAMPLER_VIEW; - } - - if ((usage & (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) && - r600_is_colorbuffer_format_supported(format)) { - retval |= usage & - (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED); - } - - if ((usage & PIPE_BIND_DEPTH_STENCIL) && - r600_is_zs_format_supported(format)) { - retval |= PIPE_BIND_DEPTH_STENCIL; - } - - if ((usage & PIPE_BIND_VERTEX_BUFFER) && - r600_is_vertex_format_supported(format)) - retval |= PIPE_BIND_VERTEX_BUFFER; - - if (usage & PIPE_BIND_TRANSFER_READ) - retval |= PIPE_BIND_TRANSFER_READ; - if (usage & PIPE_BIND_TRANSFER_WRITE) - retval |= PIPE_BIND_TRANSFER_WRITE; - - return retval == usage; -} - -static void r600_destroy_screen(struct pipe_screen* pscreen) -{ - struct r600_screen *rscreen = (struct r600_screen *)pscreen; - - if (rscreen == NULL) - return; - FREE(rscreen); -} - -struct r600_drawl { - struct pipe_context *ctx; - unsigned mode; - unsigned start; - unsigned count; - unsigned index_size; - struct pipe_resource *index_buffer; -}; - -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -static void r600_draw_common(struct r600_drawl *draw) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - unsigned i, j, offset, format, prim; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct pipe_vertex_buffer *vertex_buffer; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - - switch (draw->index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw->index_size); - return; - } - if (r600_conv_pipe_prim(draw->mode, &prim)) - return; - - /* rebuild vertex shader if input format changed */ - if (r600_pipe_shader_update2(&rctx->context, rctx->vs_shader)) - return; - if (r600_pipe_shader_update2(&rctx->context, rctx->ps_shader)) - return; - - for (i = 0 ; i < rctx->vertex_elements->count; i++) { - rstate = &rctx->vs_resource[i]; - j = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[j]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; - format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, - R_038008_RESOURCE0_WORD2, - S_038008_STRIDE(vertex_buffer->stride) | - S_038008_DATA_FORMAT(format), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, i); - } - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONFIG, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028408_VGT_INDX_OFFSET, draw->start, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw->count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw->index_buffer) { - rbuffer = (struct r600_resource*)draw->index_buffer; - rdraw.indices = rbuffer->bo; - } - r600_context_draw(&rctx->ctx, &rdraw); -} - -static void r600_draw_vbo2(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_drawl draw; - - assert(info->index_bias == 0); - - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.index_size = rctx->index_buffer.index_size; - draw.index_buffer = rctx->index_buffer.buffer; - assert(rctx->index_buffer.offset % - rctx->index_buffer.index_size == 0); - draw.start += rctx->index_buffer.offset / - rctx->index_buffer.index_size; - } else { - draw.index_size = 0; - draw.index_buffer = NULL; - } - r600_draw_common(&draw); -} - -static void r600_flush2(struct pipe_context *ctx, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; -#if 0 - static int dc = 0; - char dname[256]; -#endif - - if (!rctx->ctx.pm4_cdwords) - return; - -#if 0 - sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 2) { - r600_context_dump_bof(&rctx->ctx, dname); - R600_ERR("dumped %s\n", dname); - } - dc++; -#endif - r600_context_flush(&rctx->ctx); -} - -static void r600_destroy_context(struct pipe_context *context) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)context; - - r600_context_fini(&rctx->ctx); - for (int i = 0; i < R600_PIPE_NSTATES; i++) { - free(rctx->states[i]); - } - FREE(rctx); -} - -static void r600_blitter_save_states(struct pipe_context *ctx) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); - util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]); - if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) { - util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); - } - util_blitter_save_rasterizer(rctx->blitter, rctx->states[R600_PIPE_STATE_RASTERIZER]); - util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); - util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); - util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); - if (rctx->states[R600_PIPE_STATE_VIEWPORT]) { - util_blitter_save_viewport(rctx->blitter, &rctx->viewport); - } - if (rctx->states[R600_PIPE_STATE_CLIP]) { - util_blitter_save_clip(rctx->blitter, &rctx->clip); - } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); - - rctx->vertex_elements = NULL; - - /* TODO queries */ -} - -static void r600_clear(struct pipe_context *ctx, unsigned buffers, - const float *rgba, double depth, unsigned stencil) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - - r600_blitter_save_states(ctx); - util_blitter_clear(rctx->blitter, fb->width, fb->height, - fb->nr_cbufs, buffers, rgba, depth, - stencil); -} - -static void r600_clear_render_target(struct pipe_context *ctx, - struct pipe_surface *dst, - const float *rgba, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - - util_blitter_save_framebuffer(rctx->blitter, fb); - util_blitter_clear_render_target(rctx->blitter, dst, rgba, - dstx, dsty, width, height); -} - -static void r600_clear_depth_stencil(struct pipe_context *ctx, - struct pipe_surface *dst, - unsigned clear_flags, - double depth, - unsigned stencil, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - - util_blitter_save_framebuffer(rctx->blitter, fb); - util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, - dstx, dsty, width, height); -} - - -static void r600_resource_copy_region(struct pipe_context *ctx, - struct pipe_resource *dst, - struct pipe_subresource subdst, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) -{ - util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); -} - -static void r600_init_blit_functions2(struct r600_pipe_context *rctx) -{ - rctx->context.clear = r600_clear; - rctx->context.clear_render_target = r600_clear_render_target; - rctx->context.clear_depth_stencil = r600_clear_depth_stencil; - rctx->context.resource_copy_region = r600_resource_copy_region; -} - -static void r600_init_context_resource_functions2(struct r600_pipe_context *r600) -{ - r600->context.get_transfer = u_get_transfer_vtbl; - r600->context.transfer_map = u_transfer_map_vtbl; - r600->context.transfer_flush_region = u_transfer_flush_region_vtbl; - r600->context.transfer_unmap = u_transfer_unmap_vtbl; - r600->context.transfer_destroy = u_transfer_destroy_vtbl; - r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; -} - -static void r600_set_blend_color(struct pipe_context *ctx, - const struct pipe_blend_color *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - - if (rstate == NULL) - return; - - rstate->id = R600_PIPE_STATE_BLEND_COLOR; - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); - free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); - rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void *r600_create_blend_state(struct pipe_context *ctx, - const struct pipe_blend_state *state) -{ - struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); - struct r600_pipe_state *rstate; - u32 color_control, target_mask; - - if (blend == NULL) { - return NULL; - } - rstate = &blend->rstate; - - rstate->id = R600_PIPE_STATE_BLEND; - - target_mask = 0; - color_control = S_028808_PER_MRT_BLEND(1); - if (state->logicop_enable) { - color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); - } else { - color_control |= (0xcc << 16); - } - /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ - if (state->independent_blend_enable) { - for (int i = 0; i < 8; i++) { - if (state->rt[i].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (state->rt[i].colormask << (4 * i)); - } - } else { - for (int i = 0; i < 8; i++) { - if (state->rt[0].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (state->rt[0].colormask << (4 * i)); - } - } - blend->cb_target_mask = target_mask; - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFF, NULL); - - for (int i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; - uint32_t bc = 0; - - if (!state->rt[i].blend_enable) - continue; - - bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); - bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); - bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); - - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= S_028804_SEPARATE_ALPHA_BLEND(1); - bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); - bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); - bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); - } - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); - if (i == 0) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); - } - } - return rstate; -} - -static void r600_bind_blend_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; - struct r600_pipe_state *rstate; - - if (state == NULL) - return; - rstate = &blend->rstate; - rctx->states[rstate->id] = rstate; - rctx->cb_target_mask = blend->cb_target_mask; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void *r600_create_dsa_state(struct pipe_context *ctx, - const struct pipe_depth_stencil_alpha_state *state) -{ - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; - unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - - if (rstate == NULL) { - return NULL; - } - - rstate->id = R600_PIPE_STATE_DSA; - /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ - /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be - * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will - * be set if shader use texkill instruction - */ - db_shader_control = 0x210; - stencil_ref_mask = 0; - stencil_ref_mask_bf = 0; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); - - /* stencil */ - if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); - db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); - db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - - - stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask); - if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); - db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); - db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); - } - } - - /* alpha */ - alpha_test_control = 0; - alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); - alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); - } - - /* misc */ - db_render_control = 0; - db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - /* TODO db_render_override depends on query */ - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028430_DB_STENCILREFMASK, stencil_ref_mask, - 0xFFFFFFFF & C_028430_STENCILREF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, - 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); - - return rstate; -} - -static void *r600_create_rs_state(struct pipe_context *ctx, - const struct pipe_rasterizer_state *state) -{ - struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); - struct r600_pipe_state *rstate; - float offset_units = 0, offset_scale = 0; - unsigned offset_db_fmt_cntl = 0; - unsigned tmp; - unsigned prov_vtx = 1; - - if (rs == NULL) { - return NULL; - } - - rstate = &rs->rstate; - rs->flatshade = state->flatshade; - rs->sprite_coord_enable = state->sprite_coord_enable; - - rstate->id = R600_PIPE_STATE_RASTERIZER; - if (state->flatshade_first) - prov_vtx = 0; - tmp = 0x00000001; - if (state->sprite_coord_enable) { - tmp |= S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); - } - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028814_PA_SU_SC_MODE_CNTL, - S_028814_PROVOKING_VTX_LAST(prov_vtx) | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02881C_PA_CL_VS_OUT_CNTL, - S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, offset_db_fmt_cntl, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, fui(offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, fui(offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units), 0xFFFFFFFF, NULL); - return rstate; -} - -static void r600_bind_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (state == NULL) - return; - - if (rctx->flatshade != rs->flatshade) { - rctx->ps_rebuild = true; - } - if (rctx->sprite_coord_enable != rs->sprite_coord_enable) { - rctx->ps_rebuild = true; - } - rctx->flatshade = rs->flatshade; - rctx->sprite_coord_enable = rs->sprite_coord_enable; - - rctx->states[rs->rstate.id] = &rs->rstate; - r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); -} - -static void r600_delete_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - - if (rctx->states[rs->rstate.id] == &rs->rstate) { - rctx->states[rs->rstate.id] = NULL; - } - free(rs); -} - -static void *r600_create_sampler_state(struct pipe_context *ctx, - const struct pipe_sampler_state *state) -{ - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - union util_color uc; - - if (rstate == NULL) { - return NULL; - } - - rstate->id = R600_PIPE_STATE_SAMPLER; - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C000_SQ_TEX_SAMPLER_WORD0_0, - S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | - S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | - S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | - S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | - S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | - S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | - S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); - /* FIXME LOD it depends on texture base level ... */ - r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C004_SQ_TEX_SAMPLER_WORD1_0, - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | - S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); - if (uc.ui) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); - } - return rstate; -} - -static void *r600_create_vertex_elements(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - - assert(count < 32); - v->count = count; - v->refcount = 1; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - return v; -} - -static void r600_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *state) -{ - struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; - - pipe_resource_reference(&state->texture, NULL); - FREE(resource); -} - -static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, - struct pipe_resource *texture, - const struct pipe_sampler_view *state) -{ - struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); - struct r600_pipe_state *rstate; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; - struct r600_resource *rbuffer; - unsigned format; - uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4], array_mode = 0, tile_type = 0; - struct radeon_ws_bo *bo[2]; - - if (resource == NULL) - return NULL; - rstate = &resource->state; - - /* initialize base object */ - resource->base = *state; - resource->base.texture = NULL; - pipe_reference(NULL, &texture->reference); - resource->base.texture = texture; - resource->base.reference.count = 1; - resource->base.context = ctx; - - swizzle[0] = state->swizzle_r; - swizzle[1] = state->swizzle_g; - swizzle[2] = state->swizzle_b; - swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(texture->format, - swizzle, - &word4, &yuv_format); - if (format == ~0) { - format = 0; - } - desc = util_format_description(texture->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", texture->format); - } - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ - if (tmp->depth) { -#if 0 - r = r600_texture_from_depth(ctx, tmp, view->first_level); - if (r) { - return; - } - bo[0] = radeon_ws_bo_incref(rscreen->rw, tmp->uncompressed); - bo[1] = radeon_ws_bo_incref(rscreen->rw, tmp->uncompressed); -#endif - } - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; - - /* FIXME properly handle first level != 0 */ - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038000_RESOURCE0_WORD0, - S_038000_DIM(r600_tex_dim(texture->target)) | - S_038000_TILE_MODE(array_mode) | - S_038000_TILE_TYPE(tile_type) | - S_038000_PITCH((pitch / 8) - 1) | - S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038004_RESOURCE0_WORD1, - S_038004_TEX_HEIGHT(texture->height0 - 1) | - S_038004_TEX_DEPTH(texture->depth0 - 1) | - S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038008_RESOURCE0_WORD2, - tmp->offset[0] >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_03800C_RESOURCE0_WORD3, - tmp->offset[1] >> 8, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038010_RESOURCE0_WORD4, - word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | - S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | - S_038010_REQUEST_SIZE(1) | - S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038014_RESOURCE0_WORD5, - S_038014_LAST_LEVEL(state->last_level) | - S_038014_BASE_ARRAY(0) | - S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038018_RESOURCE0_WORD6, - S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); - - return &resource->base; -} - -static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, - struct pipe_sampler_view **views) -{ - /* TODO */ - assert(1); -} - -static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, - struct pipe_sampler_view **views) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - - for (int i = 0; i < count; i++) { - if (resource[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); - } - } -} - -static void r600_bind_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (state == NULL) - return; - rctx->states[rstate->id] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - - for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); - } -} - -static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - - /* TODO implement */ - for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); - } -} - -static void r600_delete_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (rctx->states[rstate->id] == rstate) { - rctx->states[rstate->id] = NULL; - } - for (int i = 0; i < rstate->nregs; i++) { - radeon_ws_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); - } - free(rstate); -} - -static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) -{ - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - if (v == NULL) - return; - if (--v->refcount) - return; - free(v); -} - -static void r600_set_clip_state(struct pipe_context *ctx, - const struct pipe_clip_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - - if (rstate == NULL) - return; - - rctx->clip = *state; - rstate->id = R600_PIPE_STATE_CLIP; - for (int i = 0; i < state->nr; i++) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E20_PA_CL_UCP0_X + i * 4, - fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E24_PA_CL_UCP0_Y + i * 4, - fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E28_PA_CL_UCP0_Z + i * 4, - fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E2C_PA_CL_UCP0_W + i * 4, - fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028810_PA_CL_CLIP_CNTL, - S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | - S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | - S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_CLIP]); - rctx->states[R600_PIPE_STATE_CLIP] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - r600_delete_vertex_element(ctx, rctx->vertex_elements); - rctx->vertex_elements = v; - if (v) { - v->refcount++; - rctx->vs_rebuild = true; - } -} - -static void r600_set_polygon_stipple(struct pipe_context *ctx, - const struct pipe_poly_stipple *state) -{ -} - -static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) -{ -} - -static void r600_set_scissor_state(struct pipe_context *ctx, - const struct pipe_scissor_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - u32 tl, br; - - if (rstate == NULL) - return; - - rstate->id = R600_PIPE_STATE_SCISSOR; - tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_SCISSOR]); - rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_set_stencil_ref(struct pipe_context *ctx, - const struct pipe_stencil_ref *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - u32 tmp; - - if (rstate == NULL) - return; - - rctx->stencil_ref = *state; - rstate->id = R600_PIPE_STATE_STENCIL_REF; - tmp = S_028430_STENCILREF(state->ref_value[0]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028430_DB_STENCILREFMASK, tmp, - ~C_028430_STENCILREF, NULL); - tmp = S_028434_STENCILREF_BF(state->ref_value[1]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028434_DB_STENCILREFMASK_BF, tmp, - ~C_028434_STENCILREF_BF, NULL); - - free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); - rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_set_viewport_state(struct pipe_context *ctx, - const struct pipe_viewport_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - - if (rstate == NULL) - return; - - rctx->viewport = *state; - rstate->id = R600_PIPE_STATE_VIEWPORT; - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_VIEWPORT]); - rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, - const struct pipe_framebuffer_state *state, int cb) -{ - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level = state->cbufs[cb]->level; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - struct radeon_ws_bo *bo[3]; - - rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; - rbuffer = &rtex->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - bo[2] = rbuffer->bo; - - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; - ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); - color_info = S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_SOURCE_FORMAT(1) | - S_0280A0_NUMBER_TYPE(ntype); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028040_CB_COLOR0_BASE + cb * 4, - state->cbufs[cb]->offset >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0280A0_CB_COLOR0_INFO + cb * 4, - color_info, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028060_CB_COLOR0_SIZE + cb * 4, - S_028060_PITCH_TILE_MAX(pitch) | - S_028060_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028080_CB_COLOR0_VIEW + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0280E0_CB_COLOR0_FRAG + cb * 4, - 0x00000000, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0280C0_CB_COLOR0_TILE + cb * 4, - 0x00000000, 0xFFFFFFFF, bo[2]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028100_CB_COLOR0_MASK + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); -} - -static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, - const struct pipe_framebuffer_state *state) -{ - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level; - unsigned pitch, slice, format; - - if (state->zsbuf == NULL) - return; - - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tilled = 1; - rtex->array_mode = 2; - rtex->tile_type = 1; - rtex->depth = 1; - rbuffer = &rtex->resource; - - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02800C_DB_DEPTH_BASE, - state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028000_DB_DEPTH_SIZE, - S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028010_DB_DEPTH_INFO, - S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D34_DB_PREFETCH_LIMIT, - (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL); -} - -static void r600_set_framebuffer_state(struct pipe_context *ctx, - const struct pipe_framebuffer_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - u32 shader_mask, tl, br, shader_control, target_mask; - - if (rstate == NULL) - return; - - /* unreference old buffer and reference new one */ - rstate->id = R600_PIPE_STATE_FRAMEBUFFER; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]); - } - pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf); - rctx->framebuffer = *state; - - /* build states */ - for (int i = 0; i < state->nr_cbufs; i++) { - r600_cb(rctx, rstate, state, i); - } - if (state->zsbuf) { - r600_db(rctx, rstate, state); - } - - target_mask = 0x00000000; - target_mask = 0xFFFFFFFF; - shader_mask = 0; - shader_control = 0; - for (int i = 0; i < state->nr_cbufs; i++) { - target_mask ^= 0xf << (i * 4); - shader_mask |= 0xf << (i * 4); - shader_control |= 1 << i; - } - tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028244_PA_SC_GENERIC_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, - 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0287A0_CB_SHADER_CONTROL, - shader_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028238_CB_TARGET_MASK, - 0x00000000, target_mask, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02823C_CB_SHADER_MASK, - shader_mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C30_CB_CLRCMP_CONTROL, - 0x01000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C34_CB_CLRCMP_SRC, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C38_CB_CLRCMP_DST, - 0x000000FF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C3C_CB_CLRCMP_MSK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C48_PA_SC_AA_MASK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); - rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_set_index_buffer(struct pipe_context *ctx, - const struct pipe_index_buffer *ib) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (ib) { - pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); - memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); - } else { - pipe_resource_reference(&rctx->index_buffer.buffer, NULL); - memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); - } - - /* TODO make this more like a state */ -} - -static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - for (int i = 0; i < count; i++) { - rctx->vertex_buffer[i].buffer = NULL; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); - } - rctx->nvertex_buffer = count; -} - -static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate; - struct pipe_transfer *transfer; - unsigned *nconst = NULL; - u32 *ptr, offset; - - switch (shader) { - case PIPE_SHADER_VERTEX: - rstate = rctx->vs_const; - nconst = &rctx->vs_nconst; - offset = R_030000_SQ_ALU_CONSTANT0_0 + 0x1000; - break; - case PIPE_SHADER_FRAGMENT: - rstate = rctx->ps_const; - nconst = &rctx->ps_nconst; - offset = R_030000_SQ_ALU_CONSTANT0_0; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - if (buffer && buffer->width0 > 0) { - *nconst = buffer->width0 / 16; - ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); - if (ptr == NULL) - return; - for (int i = 0; i < *nconst; i++, offset += 0x10) { - rstate[i].nregs = 0; - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x0, ptr[i * 4 + 0], 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x4, ptr[i * 4 + 1], 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x8, ptr[i * 4 + 2], 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0xC, ptr[i * 4 + 3], 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &rstate[i]); - } - pipe_buffer_unmap(ctx, buffer, transfer); - } -} - -static void *r600_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); - int r; - - r = r600_pipe_shader_create2(ctx, shader, state->tokens); - if (r) { - return NULL; - } - return shader; -} - -static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->ps_shader = (struct r600_pipe_shader *)state; -} - -static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->vs_shader = (struct r600_pipe_shader *)state; -} - -static void r600_delete_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->ps_shader == shader) { - rctx->ps_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - -static void r600_delete_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->vs_shader == shader) { - rctx->vs_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - -static void r600_init_state_functions2(struct r600_pipe_context *rctx) -{ - rctx->context.create_blend_state = r600_create_blend_state; - rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; - rctx->context.create_fs_state = r600_create_shader_state; - rctx->context.create_rasterizer_state = r600_create_rs_state; - rctx->context.create_sampler_state = r600_create_sampler_state; - rctx->context.create_sampler_view = r600_create_sampler_view; - rctx->context.create_vertex_elements_state = r600_create_vertex_elements; - rctx->context.create_vs_state = r600_create_shader_state; - rctx->context.bind_blend_state = r600_bind_blend_state; - rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; - rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler; - rctx->context.bind_fs_state = r600_bind_ps_shader; - rctx->context.bind_rasterizer_state = r600_bind_rs_state; - rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements; - rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler; - rctx->context.bind_vs_state = r600_bind_vs_shader; - rctx->context.delete_blend_state = r600_delete_state; - rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; - rctx->context.delete_fs_state = r600_delete_ps_shader; - rctx->context.delete_rasterizer_state = r600_delete_rs_state; - rctx->context.delete_sampler_state = r600_delete_state; - rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; - rctx->context.delete_vs_state = r600_delete_vs_shader; - rctx->context.set_blend_color = r600_set_blend_color; - rctx->context.set_clip_state = r600_set_clip_state; - rctx->context.set_constant_buffer = r600_set_constant_buffer; - rctx->context.set_fragment_sampler_views = r600_set_ps_sampler_view; - rctx->context.set_framebuffer_state = r600_set_framebuffer_state; - rctx->context.set_polygon_stipple = r600_set_polygon_stipple; - rctx->context.set_sample_mask = r600_set_sample_mask; - rctx->context.set_scissor_state = r600_set_scissor_state; - rctx->context.set_stencil_ref = r600_set_stencil_ref; - rctx->context.set_vertex_buffers = r600_set_vertex_buffers; - rctx->context.set_index_buffer = r600_set_index_buffer; - rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; - rctx->context.set_viewport_state = r600_set_viewport_state; - rctx->context.sampler_view_destroy = r600_sampler_view_destroy; -} - -static void r600_init_config2(struct r600_pipe_context *rctx) -{ - int ps_prio; - int vs_prio; - int gs_prio; - int es_prio; - int num_ps_gprs; - int num_vs_gprs; - int num_gs_gprs; - int num_es_gprs; - int num_temp_gprs; - int num_ps_threads; - int num_vs_threads; - int num_gs_threads; - int num_es_threads; - int num_ps_stack_entries; - int num_vs_stack_entries; - int num_gs_stack_entries; - int num_es_stack_entries; - enum radeon_family family; - struct r600_pipe_state *rstate = &rctx->config; - u32 tmp; - - family = r600_get_family(rctx->radeon); - ps_prio = 0; - vs_prio = 1; - gs_prio = 2; - es_prio = 3; - switch (family) { - case CHIP_R600: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV630: - case CHIP_RV635: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 40; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - default: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV670: - num_ps_gprs = 144; - num_vs_gprs = 40; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV770: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 256; - num_vs_stack_entries = 256; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV730: - case CHIP_RV740: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV710: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 48; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - } - - rstate->id = R600_PIPE_STATE_CONFIG; - - /* SQ_CONFIG */ - tmp = 0; - switch (family) { - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV710: - break; - default: - tmp |= S_008C00_VC_ENABLE(1); - break; - } - tmp |= S_008C00_DX9_CONSTS(1); - tmp |= S_008C00_ALU_INST_PREFER_VECTOR(1); - tmp |= S_008C00_PS_PRIO(ps_prio); - tmp |= S_008C00_VS_PRIO(vs_prio); - tmp |= S_008C00_GS_PRIO(gs_prio); - tmp |= S_008C00_ES_PRIO(es_prio); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); - - /* SQ_GPR_RESOURCE_MGMT_1 */ - tmp = 0; - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); - - /* SQ_GPR_RESOURCE_MGMT_2 */ - tmp = 0; - tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); - - /* SQ_THREAD_RESOURCE_MGMT */ - tmp = 0; - tmp |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); - - /* SQ_STACK_RESOURCE_MGMT_1 */ - tmp = 0; - tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); - - /* SQ_STACK_RESOURCE_MGMT_2 */ - tmp = 0; - tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); - - if (family >= CHIP_RV770) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A4C_PA_SC_MODE_CNTL, 0x00514000, 0xFFFFFFFF, NULL); - } else { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A4C_PA_SC_MODE_CNTL, 0x00004010, 0xFFFFFFFF, NULL); - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028400_VGT_MAX_VTX_INDX, 0x00FFFFFF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028404_VGT_MIN_VTX_INDX, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static struct pipe_context *r600_create_context2(struct pipe_screen *screen, void *priv) -{ - struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context); - struct r600_screen* rscreen = (struct r600_screen *)screen; - - if (rctx == NULL) - return NULL; - rctx->context.winsys = rscreen->screen.winsys; - rctx->context.screen = screen; - rctx->context.priv = priv; - rctx->context.destroy = r600_destroy_context; - rctx->context.draw_vbo = r600_draw_vbo2; - rctx->context.flush = r600_flush2; - - /* Easy accessing of screen/winsys. */ - rctx->screen = rscreen; - rctx->radeon = rscreen->radeon; - - r600_init_blit_functions2(rctx); - r600_init_state_functions2(rctx); - r600_init_context_resource_functions2(rctx); - - rctx->blitter = util_blitter_create(&rctx->context); - if (rctx->blitter == NULL) { - FREE(rctx); - return NULL; - } - - if (r600_context_init(&rctx->ctx, rctx->radeon)) { - r600_destroy_context(&rctx->context); - return NULL; - } - - r600_init_config2(rctx); - - return &rctx->context; -} - -static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) -{ - switch(shader) - { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_VERTEX: - break; - case PIPE_SHADER_GEOMETRY: - /* TODO: support and enable geometry programs */ - return 0; - default: - /* TODO: support tessellation on Evergreen */ - return 0; - } - - /* TODO: all these should be fixed, since r600 surely supports much more! */ - switch (param) { - case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: - return 16384; - case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: - return 8; /* FIXME */ - case PIPE_SHADER_CAP_MAX_INPUTS: - if(shader == PIPE_SHADER_FRAGMENT) - return 10; - else - return 16; - case PIPE_SHADER_CAP_MAX_TEMPS: - return 256; //max native temporaries - case PIPE_SHADER_CAP_MAX_ADDRS: - return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */ - case PIPE_SHADER_CAP_MAX_CONSTS: - return 256; //max native parameters - case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; - case PIPE_SHADER_CAP_MAX_PREDS: - return 0; /* FIXME */ - case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - /* TODO: support this! */ - return 0; - default: - return 0; - } -} - -void r600_init_screen_texture_functions(struct pipe_screen *screen); -struct pipe_screen *r600_screen_create2(struct radeon *radeon) -{ - struct r600_screen *rscreen; - enum radeon_family family = r600_get_family(radeon); - - rscreen = CALLOC_STRUCT(r600_screen); - if (rscreen == NULL) { - return NULL; - } - - switch (family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - rscreen->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - rscreen->chip_class = R700; - break; - default: - FREE(rscreen); - return NULL; - } - rscreen->radeon = radeon; - rscreen->screen.winsys = (struct pipe_winsys*)radeon; - rscreen->screen.destroy = r600_destroy_screen; - rscreen->screen.get_name = r600_get_name; - rscreen->screen.get_vendor = r600_get_vendor; - rscreen->screen.get_param = r600_get_param; - rscreen->screen.get_shader_param = r600_get_shader_param; - rscreen->screen.get_paramf = r600_get_paramf; - rscreen->screen.is_format_supported = r600_is_format_supported; - rscreen->screen.context_create = r600_create_context2; - r600_init_screen_texture_functions(&rscreen->screen); - r600_init_screen_resource_functions(&rscreen->screen); - - return &rscreen->screen; -} diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index b4c21d9e12..1c1978f8ab 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "r600d.h" +#include "r600_formats.h" static INLINE uint32_t r600_translate_blend_function(int blend_func) { @@ -123,6 +124,21 @@ static INLINE uint32_t r600_translate_stencil_op(int s_op) return 0; } +static INLINE uint32_t r600_translate_fill(uint32_t func) +{ + switch(func) { + case PIPE_POLYGON_MODE_FILL: + return 2; + case PIPE_POLYGON_MODE_LINE: + return 1; + case PIPE_POLYGON_MODE_POINT: + return 0; + default: + assert(0); + return 0; + } +} + /* translates straight */ static INLINE uint32_t r600_translate_ds_func(int func) { @@ -136,17 +152,17 @@ static inline unsigned r600_tex_wrap(unsigned wrap) case PIPE_TEX_WRAP_REPEAT: return V_03C000_SQ_TEX_WRAP; case PIPE_TEX_WRAP_CLAMP: - return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return V_03C000_SQ_TEX_CLAMP_BORDER; case PIPE_TEX_WRAP_MIRROR_REPEAT: return V_03C000_SQ_TEX_MIRROR; case PIPE_TEX_WRAP_MIRROR_CLAMP: - return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; } @@ -283,6 +299,17 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + return V_0280A0_SWAP_STD; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -310,22 +337,29 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_0280A0_SWAP_STD; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_SWAP_STD; + case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_0280A0_SWAP_STD_REV; + case PIPE_FORMAT_R16G16_UNORM: + return V_0280A0_SWAP_STD; + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return V_0280A0_COLOR_16_16_16_16; + // return FMT_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return V_0280A0_COLOR_16_16_16_16_FLOAT; + // return FMT_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return V_0280A0_COLOR_32_32_32_32_FLOAT; + // return FMT_32_32_32_32_FLOAT; return 0; default: R600_ERR("unsupported colorswap format %d\n", format); @@ -357,6 +391,16 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: return V_0280A0_COLOR_4_4_4_4; + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_COLOR_16; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_0280A0_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -383,18 +427,41 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_0280A0_COLOR_8_24; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_COLOR_24_8; + case PIPE_FORMAT_R32_FLOAT: return V_0280A0_COLOR_32_FLOAT; + case PIPE_FORMAT_R16G16_FLOAT: + return V_0280A0_COLOR_16_16_FLOAT; + + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: + return V_0280A0_COLOR_16_16; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: return V_0280A0_COLOR_16_16_16_16; + + case PIPE_FORMAT_R16G16B16_FLOAT: case PIPE_FORMAT_R16G16B16A16_FLOAT: return V_0280A0_COLOR_16_16_16_16_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: return V_0280A0_COLOR_32_32_FLOAT; + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32_SSCALED: + return V_0280A0_COLOR_32_32; + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32_FLOAT: return V_0280A0_COLOR_32_32_32_FLOAT; @@ -405,7 +472,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_UYVY: case PIPE_FORMAT_YUYV: default: - R600_ERR("unsupported color format %d\n", format); + R600_ERR("unsupported color format %d %s\n", format, util_format_name(format)); return ~0; /* Unsupported. */ } } @@ -431,4 +498,139 @@ static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) return r600_translate_colorformat(format) != ~0; } +static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) +{ + uint32_t result = 0; + const struct util_format_description *desc; + unsigned i; + + desc = util_format_description(format); + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + goto out_unknown; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { + /* Half-floats, floats, doubles */ + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[i].size) { + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16_FLOAT; + break; + case 2: + result = FMT_16_16_FLOAT; + break; + case 3: + result = FMT_16_16_16_FLOAT; + break; + case 4: + result = FMT_16_16_16_16_FLOAT; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32_FLOAT; + break; + case 2: + result = FMT_32_32_FLOAT; + break; + case 3: + result = FMT_32_32_32_FLOAT; + break; + case 4: + result = FMT_32_32_32_32_FLOAT; + break; + } + break; + default: + goto out_unknown; + } + break; + /* Unsigned ints */ + case UTIL_FORMAT_TYPE_UNSIGNED: + /* Signed ints */ + case UTIL_FORMAT_TYPE_SIGNED: + switch (desc->channel[i].size) { + case 8: + switch (desc->nr_channels) { + case 1: + result = FMT_8; + break; + case 2: + result = FMT_8_8; + break; + case 3: + // result = FMT_8_8_8; /* fails piglit draw-vertices test */ + // break; + case 4: + result = FMT_8_8_8_8; + break; + } + break; + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16; + break; + case 2: + result = FMT_16_16; + break; + case 3: + // result = FMT_16_16_16; /* fails piglit draw-vertices test */ + // break; + case 4: + result = FMT_16_16_16_16; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32; + break; + case 2: + result = FMT_32_32; + break; + case 3: + result = FMT_32_32_32; + break; + case 4: + result = FMT_32_32_32_32; + break; + } + break; + default: + goto out_unknown; + } + break; + default: + goto out_unknown; + } + + result = S_038008_DATA_FORMAT(result); + + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= S_038008_FORMAT_COMP_ALL(1); + } + if (desc->channel[i].normalized) { + result |= S_038008_NUM_FORMAT_ALL(0); + } else { + result |= S_038008_NUM_FORMAT_ALL(2); + } + return result; +out_unknown: + R600_ERR("unsupported vertex format %s\n", util_format_name(format)); + return ~0; +} + #endif diff --git a/src/gallium/drivers/r600/r600_states_inc.h b/src/gallium/drivers/r600/r600_states_inc.h index de717f3536..1c8075ebdb 100644 --- a/src/gallium/drivers/r600/r600_states_inc.h +++ b/src/gallium/drivers/r600/r600_states_inc.h @@ -15,35 +15,34 @@ #define R600_CONFIG__DB_WATERMARKS 10 #define R600_CONFIG__SX_MISC 11 #define R600_CONFIG__SPI_THREAD_GROUPING 12 -#define R600_CONFIG__CB_SHADER_CONTROL 13 -#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 14 -#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 15 -#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 16 -#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 17 -#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 18 -#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 19 -#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 20 -#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 21 -#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 22 -#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 23 -#define R600_CONFIG__VGT_HOS_CNTL 24 -#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 25 -#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 26 -#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 27 -#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 28 -#define R600_CONFIG__VGT_GROUP_FIRST_DECR 29 -#define R600_CONFIG__VGT_GROUP_DECR 30 -#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 31 -#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 32 -#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 33 -#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 34 -#define R600_CONFIG__VGT_GS_MODE 35 -#define R600_CONFIG__PA_SC_MODE_CNTL 36 -#define R600_CONFIG__VGT_STRMOUT_EN 37 -#define R600_CONFIG__VGT_REUSE_OFF 38 -#define R600_CONFIG__VGT_VTX_CNT_EN 39 -#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 40 -#define R600_CONFIG_SIZE 41 +#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 13 +#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 14 +#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 15 +#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 16 +#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 17 +#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 18 +#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 19 +#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 20 +#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 21 +#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 22 +#define R600_CONFIG__VGT_HOS_CNTL 23 +#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 24 +#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 25 +#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 26 +#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 27 +#define R600_CONFIG__VGT_GROUP_FIRST_DECR 28 +#define R600_CONFIG__VGT_GROUP_DECR 29 +#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 30 +#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 31 +#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 32 +#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 33 +#define R600_CONFIG__VGT_GS_MODE 34 +#define R600_CONFIG__PA_SC_MODE_CNTL 35 +#define R600_CONFIG__VGT_STRMOUT_EN 36 +#define R600_CONFIG__VGT_REUSE_OFF 37 +#define R600_CONFIG__VGT_VTX_CNT_EN 38 +#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 39 +#define R600_CONFIG_SIZE 40 #define R600_CONFIG_PM4 128 /* R600_CB_CNTL */ @@ -65,7 +64,8 @@ #define R600_CB_CNTL__CB_CLRCMP_DST 15 #define R600_CB_CNTL__CB_CLRCMP_MSK 16 #define R600_CB_CNTL__PA_SC_AA_MASK 17 -#define R600_CB_CNTL_SIZE 18 +#define R600_CB_CNTL__CB_SHADER_CONTROL 18 +#define R600_CB_CNTL_SIZE 19 #define R600_CB_CNTL_PM4 128 /* R600_RASTERIZER */ diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 274679d127..152cd9210f 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -31,11 +31,11 @@ #include <util/u_inlines.h> #include <util/u_memory.h> #include "state_tracker/drm_driver.h" -#include "r600_screen.h" -#include "r600_context.h" +#include "r600_pipe.h" #include "r600_resource.h" #include "r600_state_inlines.h" #include "r600d.h" +#include "r600_formats.h" extern struct u_resource_vtbl r600_texture_vtbl; @@ -54,11 +54,30 @@ static void r600_copy_from_tiled_texture(struct pipe_context *ctx, struct r600_t transfer->box.width, transfer->box.height); } -static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, + +/* Copy from a detiled texture to a tiled one. */ +static void r600_copy_into_tiled_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) +{ + struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; + struct pipe_resource *texture = transfer->resource; + struct pipe_subresource subsrc; + + subsrc.face = 0; + subsrc.level = 0; + ctx->resource_copy_region(ctx, texture, transfer->sr, + transfer->box.x, transfer->box.y, transfer->box.z, + rtransfer->linear_texture, subsrc, + 0, 0, 0, + transfer->box.width, transfer->box.height); + + ctx->flush(ctx, 0, NULL); +} + +static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned zslice, unsigned face) { - unsigned long offset = rtex->offset[level]; + unsigned offset = rtex->offset[level]; switch (rtex->resource.base.b.target) { case PIPE_TEXTURE_3D: @@ -73,73 +92,128 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, } } -static void r600_setup_miptree(struct r600_resource_texture *rtex) +static unsigned r600_texture_get_stride(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + struct radeon *radeon = (struct radeon *)screen->winsys; + enum chip_class chipc = r600_get_family_class(radeon); + unsigned width, stride; + + if (rtex->pitch_override) + return rtex->pitch_override; + + width = u_minify(ptex->width0, level); + + stride = util_format_get_stride(ptex->format, align(width, 64)); + if (chipc == EVERGREEN) + stride = align(stride, 512); + else + stride = align(stride, 256); + return stride; +} + +static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + unsigned height; + + height = u_minify(ptex->height0, level); + height = util_next_power_of_two(height); + return util_format_get_nblocksy(ptex->format, height); +} + +/* Get a width in pixels from a stride in bytes. */ +static unsigned pitch_to_width(enum pipe_format format, + unsigned pitch_in_bytes) +{ + return (pitch_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); +} + +static void r600_setup_miptree(struct pipe_screen *screen, + struct r600_resource_texture *rtex) { struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned long w, h, pitch, size, layer_size, i, offset; + struct radeon *radeon = (struct radeon *)screen->winsys; + enum chip_class chipc = r600_get_family_class(radeon); + unsigned pitch, size, layer_size, i, offset; + unsigned nblocksy; - rtex->bpt = util_format_get_blocksize(ptex->format); for (i = 0, offset = 0; i <= ptex->last_level; i++) { - w = u_minify(ptex->width0, i); - h = u_minify(ptex->height0, i); - h = util_next_power_of_two(h); - pitch = util_format_get_stride(ptex->format, align(w, 64)); - pitch = align(pitch, 256); - layer_size = pitch * h; - if (ptex->target == PIPE_TEXTURE_CUBE) - size = layer_size * 6; + pitch = r600_texture_get_stride(screen, rtex, i); + nblocksy = r600_texture_get_nblocksy(screen, rtex, i); + + layer_size = pitch * nblocksy; + + if (ptex->target == PIPE_TEXTURE_CUBE) { + if (chipc >= R700) + size = layer_size * 8; + else + size = layer_size * 6; + } else size = layer_size * u_minify(ptex->depth0, i); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch[i] = pitch; - rtex->width[i] = w; - rtex->height[i] = h; + rtex->pitch_in_bytes[i] = pitch; + rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch); offset += size; } rtex->size = offset; } -struct pipe_resource *r600_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ) +static struct r600_resource_texture * +r600_texture_create_object(struct pipe_screen *screen, + const struct pipe_resource *base, + unsigned array_mode, + unsigned pitch_in_bytes_override, + unsigned max_buffer_size, + struct r600_bo *bo) { struct r600_resource_texture *rtex; struct r600_resource *resource; struct radeon *radeon = (struct radeon *)screen->winsys; rtex = CALLOC_STRUCT(r600_resource_texture); - if (!rtex) { + if (rtex == NULL) return NULL; - } + resource = &rtex->resource; - resource->base.b = *templ; + resource->base.b = *base; resource->base.vtbl = &r600_texture_vtbl; pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; - r600_setup_miptree(rtex); - - /* FIXME alignment 4096 enought ? too much ? */ + resource->bo = bo; resource->domain = r600_domain_from_usage(resource->base.b.bind); + rtex->pitch_override = pitch_in_bytes_override; + rtex->array_mode = array_mode; + + r600_setup_miptree(screen, rtex); + resource->size = rtex->size; - resource->bo = radeon_ws_bo(radeon, rtex->size, 4096, 0); - if (resource->bo == NULL) { - FREE(rtex); - return NULL; + + if (!resource->bo) { + resource->bo = r600_bo(radeon, rtex->size, 4096, 0); + if (!resource->bo) { + FREE(rtex); + return NULL; + } } - return &resource->base.b; + return rtex; } -static void r600_texture_destroy_state(struct pipe_resource *ptexture) +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ) { - struct r600_resource_texture *rtexture = (struct r600_resource_texture*)ptexture; + unsigned array_mode = 0; + + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, + 0, 0, NULL); - for (int i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) { - radeon_state_fini(&rtexture->scissor[i]); - radeon_state_fini(&rtexture->db[i]); - for (int j = 0; j < 8; j++) { - radeon_state_fini(&rtexture->cb[j][i]); - } - } } static void r600_texture_destroy(struct pipe_screen *screen, @@ -149,13 +223,12 @@ static void r600_texture_destroy(struct pipe_screen *screen, struct r600_resource *resource = &rtex->resource; struct radeon *radeon = (struct radeon *)screen->winsys; + if (rtex->flushed_depth_texture) + pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + if (resource->bo) { - radeon_ws_bo_reference(radeon, &resource->bo, NULL); + r600_bo_reference(radeon, &resource->bo, NULL); } - if (rtex->uncompressed) { - radeon_ws_bo_reference(radeon, &rtex->uncompressed, NULL); - } - r600_texture_destroy_state(ptex); FREE(rtex); } @@ -166,7 +239,7 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - unsigned long offset; + unsigned offset; if (surface == NULL) return NULL; @@ -191,46 +264,28 @@ static void r600_tex_surface_destroy(struct pipe_surface *surface) FREE(surface); } + struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle) { struct radeon *rw = (struct radeon*)screen->winsys; - struct r600_resource_texture *rtex; - struct r600_resource *resource; - struct radeon_ws_bo *bo = NULL; + struct r600_bo *bo = NULL; /* Support only 2D textures without mipmaps */ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || templ->depth0 != 1 || templ->last_level != 0) return NULL; - rtex = CALLOC_STRUCT(r600_resource_texture); - if (rtex == NULL) - return NULL; - - bo = radeon_ws_bo_handle(rw, whandle->handle); + bo = r600_bo_handle(rw, whandle->handle); if (bo == NULL) { - FREE(rtex); return NULL; } - resource = &rtex->resource; - resource->base.b = *templ; - resource->base.vtbl = &r600_texture_vtbl; - pipe_reference_init(&resource->base.b.reference, 1); - resource->base.b.screen = screen; - resource->bo = bo; - rtex->depth = 0; - rtex->pitch_override = whandle->stride; - rtex->bpt = util_format_get_blocksize(templ->format); - rtex->pitch[0] = whandle->stride; - rtex->width[0] = templ->width0; - rtex->height[0] = templ->height0; - rtex->offset[0] = 0; - rtex->size = align(rtex->pitch[0] * templ->height0, 64); - - return &resource->base.b; + return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0, + whandle->stride, + 0, + bo); } static unsigned int r600_texture_is_referenced(struct pipe_context *context, @@ -241,6 +296,41 @@ static unsigned int r600_texture_is_referenced(struct pipe_context *context, return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } +int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); + +int r600_texture_depth_flush(struct pipe_context *ctx, + struct pipe_resource *texture) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; + struct pipe_resource resource; + + if (rtex->flushed_depth_texture) + goto out; + + resource.target = PIPE_TEXTURE_2D; + resource.format = texture->format; + resource.width0 = texture->width0; + resource.height0 = texture->height0; + resource.depth0 = 1; + resource.last_level = 0; + resource.nr_samples = 0; + resource.usage = PIPE_USAGE_DYNAMIC; + resource.bind = 0; + resource.flags = 0; + + resource.bind |= PIPE_BIND_RENDER_TARGET; + + rtex->flushed_depth_texture = (struct r600_resource_texture *)ctx->screen->resource_create(ctx->screen, &resource); + if (rtex->flushed_depth_texture == NULL) { + R600_ERR("failed to create temporary texture to hold untiled copy\n"); + return -ENOMEM; + } + +out: + r600_blit_uncompress_depth_ptr(ctx, rtex); + return 0; +} + struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, struct pipe_subresource sr, @@ -250,6 +340,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_resource resource; struct r600_transfer *trans; + int r; trans = CALLOC_STRUCT(r600_transfer); if (trans == NULL) @@ -258,14 +349,22 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.sr = sr; trans->transfer.usage = usage; trans->transfer.box = *box; - trans->transfer.stride = rtex->pitch[sr.level]; + trans->transfer.stride = rtex->pitch_in_bytes[sr.level]; trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); - if (rtex->tilled && !rtex->depth) { + if (rtex->depth) { + r = r600_texture_depth_flush(ctx, texture); + if (r < 0) { + R600_ERR("failed to create temporary texture to hold untiled copy\n"); + pipe_resource_reference(&trans->transfer.resource, NULL); + FREE(trans); + return NULL; + } + } else if (rtex->tiled) { resource.target = PIPE_TEXTURE_2D; resource.format = texture->format; resource.width0 = box->width; resource.height0 = box->height; - resource.depth0 = 0; + resource.depth0 = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; @@ -289,6 +388,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, FREE(trans); return NULL; } + + trans->transfer.stride = + ((struct r600_resource_texture *)trans->linear_texture)->pitch_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. */ @@ -304,10 +406,17 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; if (rtransfer->linear_texture) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { + r600_copy_into_tiled_texture(ctx, rtransfer); + } pipe_resource_reference(&rtransfer->linear_texture, NULL); } + if (rtex->flushed_depth_texture) { + pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + } pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); } @@ -315,40 +424,31 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, void* r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer) { - struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct radeon_ws_bo *bo; + struct r600_bo *bo; enum pipe_format format = transfer->resource->format; struct radeon *radeon = (struct radeon *)ctx->screen->winsys; - struct r600_resource_texture *rtex; - unsigned long offset = 0; + unsigned offset = 0; char *map; - int r; - ctx->flush(ctx, 0, NULL); if (rtransfer->linear_texture) { bo = ((struct r600_resource *)rtransfer->linear_texture)->bo; } else { - rtex = (struct r600_resource_texture*)transfer->resource; - if (rtex->depth && rscreen->chip_class != EVERGREEN) { - r = r600_texture_from_depth(ctx, rtex, transfer->sr.level); - if (r) { - return NULL; - } - r600_flush(ctx, 0, NULL); - bo = rtex->uncompressed; - } else { + struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; + + if (rtex->flushed_depth_texture) + bo = ((struct r600_resource *)rtex->flushed_depth_texture)->bo; + else bo = ((struct r600_resource *)transfer->resource)->bo; - } + offset = rtransfer->offset + transfer->box.y / util_format_get_blockheight(format) * transfer->stride + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } - map = radeon_ws_bo_map(radeon, bo, 0, r600_context(ctx)); + map = r600_bo_map(radeon, bo, 0, ctx); if (!map) { return NULL; } - radeon_ws_bo_wait(radeon, bo); return map + offset; } @@ -358,20 +458,20 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct radeon *radeon = (struct radeon *)ctx->screen->winsys; - struct r600_resource_texture *rtex; - struct radeon_ws_bo *bo; + struct r600_bo *bo; if (rtransfer->linear_texture) { bo = ((struct r600_resource *)rtransfer->linear_texture)->bo; } else { - rtex = (struct r600_resource_texture*)transfer->resource; - if (rtex->depth) { - bo = rtex->uncompressed; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; + + if (rtex->flushed_depth_texture) { + bo = ((struct r600_resource *)rtex->flushed_depth_texture)->bo; } else { bo = ((struct r600_resource *)transfer->resource)->bo; } } - radeon_ws_bo_unmap(radeon, bo); + r600_bo_unmap(radeon, bo); } struct u_resource_vtbl r600_texture_vtbl = @@ -466,13 +566,23 @@ uint32_t r600_translate_texformat(enum pipe_format format, case UTIL_FORMAT_COLORSPACE_ZS: switch (format) { case PIPE_FORMAT_Z16_UNORM: - result = V_0280A0_COLOR_16; + result = FMT_16; goto out_word4; + case PIPE_FORMAT_X24S8_USCALED: + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); case PIPE_FORMAT_Z24X8_UNORM: - result = V_0280A0_COLOR_8_24; - goto out_word4; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - result = V_0280A0_COLOR_8_24; + result = FMT_8_24; + goto out_word4; + case PIPE_FORMAT_S8X24_USCALED: + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + result = FMT_24_8; + goto out_word4; + case PIPE_FORMAT_S8_USCALED: + result = V_0280A0_COLOR_8; + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); goto out_word4; default: goto out_unknown; @@ -526,7 +636,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && desc->channel[2].size == 5) { - result = V_0280A0_COLOR_5_6_5; + result = FMT_5_6_5; goto out_word4; } goto out_unknown; @@ -535,14 +645,14 @@ uint32_t r600_translate_texformat(enum pipe_format format, desc->channel[1].size == 5 && desc->channel[2].size == 5 && desc->channel[3].size == 1) { - result = V_0280A0_COLOR_1_5_5_5; + result = FMT_1_5_5_5; goto out_word4; } if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && desc->channel[2].size == 10 && desc->channel[3].size == 2) { - result = V_0280A0_COLOR_10_10_10_2; + result = FMT_10_10_10_2; goto out_word4; } goto out_unknown; @@ -550,79 +660,89 @@ uint32_t r600_translate_texformat(enum pipe_format format, goto out_unknown; } + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + goto out_unknown; + /* uniform formats */ - switch (desc->channel[0].type) { + switch (desc->channel[i].type) { case UTIL_FORMAT_TYPE_UNSIGNED: case UTIL_FORMAT_TYPE_SIGNED: - if (!desc->channel[0].normalized && + if (!desc->channel[i].normalized && desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { goto out_unknown; } - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 4: switch (desc->nr_channels) { case 2: - result = V_0280A0_COLOR_4_4; + result = FMT_4_4; goto out_word4; case 4: - result = V_0280A0_COLOR_4_4_4_4; + result = FMT_4_4_4_4; goto out_word4; } goto out_unknown; case 8: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_8; + result = FMT_8; goto out_word4; case 2: - result = V_0280A0_COLOR_8_8; + result = FMT_8_8; goto out_word4; case 4: - result = V_0280A0_COLOR_8_8_8_8; + result = FMT_8_8_8_8; goto out_word4; } goto out_unknown; case 16: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_16; + result = FMT_16; goto out_word4; case 2: - result = V_0280A0_COLOR_16_16; + result = FMT_16_16; goto out_word4; case 4: - result = V_0280A0_COLOR_16_16_16_16; + result = FMT_16_16_16_16; goto out_word4; } } goto out_unknown; case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 16: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_16_FLOAT; + result = FMT_16_FLOAT; goto out_word4; case 2: - result = V_0280A0_COLOR_16_16_FLOAT; + result = FMT_16_16_FLOAT; goto out_word4; case 4: - result = V_0280A0_COLOR_16_16_16_16_FLOAT; + result = FMT_16_16_16_16_FLOAT; goto out_word4; } goto out_unknown; case 32: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_32_FLOAT; + result = FMT_32_FLOAT; goto out_word4; case 2: - result = V_0280A0_COLOR_32_32_FLOAT; + result = FMT_32_32_FLOAT; goto out_word4; case 4: - result = V_0280A0_COLOR_32_32_32_32_FLOAT; + result = FMT_32_32_32_32_FLOAT; goto out_word4; } } @@ -638,81 +758,3 @@ out_unknown: // R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); return ~0; } - -int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - int r; - - if (!rtexture->depth) { - /* This shouldn't happen maybe print a warning */ - return 0; - } - if (rtexture->uncompressed && !rtexture->dirty) { - /* Uncompressed bo already in good state */ - return 0; - } - - /* allocate uncompressed texture */ - if (rtexture->uncompressed == NULL) { - rtexture->uncompressed = radeon_ws_bo(rscreen->rw, rtexture->size, 4096, 0); - if (rtexture->uncompressed == NULL) { - return -ENOMEM; - } - } - - /* render a rectangle covering whole buffer to uncompress depth */ - r = r600_blit_uncompress_depth(ctx, rtexture, level); - if (r) { - return r; - } - - rtexture->dirty = 0; - return 0; -} - - - -int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - - if (!rtexture->scissor[level].cpm4) { - rctx->vtbl->texture_state_scissor(rscreen, rtexture, level); - } - return 0; -} - -int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - - if (!rtexture->cb[cb][level].cpm4) { - rctx->vtbl->texture_state_cb(rscreen, rtexture, cb, level); - } - return 0; -} - -int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - - if (!rtexture->db[level].cpm4) { - rctx->vtbl->texture_state_db(rscreen, rtexture, level); - } - return 0; -} - -int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - - if (!rtexture->viewport[level].cpm4) { - rctx->vtbl->texture_state_viewport(rscreen, rtexture, level); - } - return 0; -} diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 07bfc0593e..a3cb5b8600 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -607,8 +607,15 @@ #define G_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) >> 0) & 0x3FF) #define C_028D34_DEPTH_HEIGHT_TILE_MAX 0xFFFFFC00 #define R_028D0C_DB_RENDER_CONTROL 0x028D0C +#define S_028D0C_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0) +#define S_028D0C_STENCIL_CLEAR_ENABLE(x) (((x) & 0x1) << 1) +#define S_028D0C_DEPTH_COPY_ENABLE(x) (((x) & 0x1) << 2) +#define S_028D0C_STENCIL_COPY_ENABLE(x) (((x) & 0x1) << 3) +#define S_028D0C_RESUMMARIZE_ENABLE(x) (((x) & 0x1) << 4) #define S_028D0C_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) #define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) +#define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7) +#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x1) << 8) #define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15) #define R_028D10_DB_RENDER_OVERRIDE 0x028D10 #define V_028D10_FORCE_OFF 0 @@ -660,6 +667,9 @@ #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE +#define S_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) & 0x1) << 1) +#define G_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) >> 1) & 0x1) +#define C_02880C_STENCIL_REF_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) #define C_02880C_Z_ORDER 0xFFFFFCFF @@ -894,6 +904,10 @@ #define S_038000_TILE_MODE(x) (((x) & 0xF) << 3) #define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF) #define C_038000_TILE_MODE 0xFFFFFF87 +#define V_038000_ARRAY_LINEAR_GENERAL 0x00000000 +#define V_038000_ARRAY_LINEAR_ALIGNED 0x00000001 +#define V_038000_ARRAY_1D_TILED_THIN1 0x00000002 +#define V_038000_ARRAY_2D_TILED_THIN1 0x00000004 #define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7) #define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1) #define C_038000_TILE_TYPE 0xFFFFFF7F @@ -1018,43 +1032,13 @@ #define S_038008_DATA_FORMAT(x) (((x) & 0x3F) << 20) #define G_038008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) #define C_038008_DATA_FORMAT 0xFC0FFFFF -#define V_038008_COLOR_INVALID 0x00000000 -#define V_038008_COLOR_8 0x00000001 -#define V_038008_COLOR_4_4 0x00000002 -#define V_038008_COLOR_3_3_2 0x00000003 -#define V_038008_COLOR_16 0x00000005 -#define V_038008_COLOR_16_FLOAT 0x00000006 -#define V_038008_COLOR_8_8 0x00000007 -#define V_038008_COLOR_5_6_5 0x00000008 -#define V_038008_COLOR_6_5_5 0x00000009 -#define V_038008_COLOR_1_5_5_5 0x0000000A -#define V_038008_COLOR_4_4_4_4 0x0000000B -#define V_038008_COLOR_5_5_5_1 0x0000000C -#define V_038008_COLOR_32 0x0000000D -#define V_038008_COLOR_32_FLOAT 0x0000000E -#define V_038008_COLOR_16_16 0x0000000F -#define V_038008_COLOR_16_16_FLOAT 0x00000010 -#define V_038008_COLOR_8_24 0x00000011 -#define V_038008_COLOR_8_24_FLOAT 0x00000012 -#define V_038008_COLOR_24_8 0x00000013 -#define V_038008_COLOR_24_8_FLOAT 0x00000014 -#define V_038008_COLOR_10_11_11 0x00000015 -#define V_038008_COLOR_10_11_11_FLOAT 0x00000016 -#define V_038008_COLOR_11_11_10 0x00000017 -#define V_038008_COLOR_11_11_10_FLOAT 0x00000018 -#define V_038008_COLOR_2_10_10_10 0x00000019 -#define V_038008_COLOR_8_8_8_8 0x0000001A -#define V_038008_COLOR_10_10_10_2 0x0000001B -#define V_038008_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_038008_COLOR_32_32 0x0000001D -#define V_038008_COLOR_32_32_FLOAT 0x0000001E -#define V_038008_COLOR_16_16_16_16 0x0000001F -#define V_038008_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_038008_COLOR_32_32_32_32 0x00000022 -#define V_038008_COLOR_32_32_32_32_FLOAT 0x00000023 + #define S_038008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) #define G_038008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) #define C_038008_NUM_FORMAT_ALL 0xF3FFFFFF +#define V_038008_SQ_NUM_FORMAT_NORM 0x00000000 +#define V_038008_SQ_NUM_FORMAT_INT 0x00000001 +#define V_038008_SQ_NUM_FORMAT_SCALED 0x00000002 #define S_038008_FORMAT_COMP_ALL(x) (((x) & 0x1) << 28) #define G_038008_FORMAT_COMP_ALL(x) (((x) >> 28) & 0x1) #define C_038008_FORMAT_COMP_ALL 0xEFFFFFFF @@ -2112,6 +2096,9 @@ #define R_0286D4_SPI_INTERP_CONTROL_0 0x0286D4 #define R_028A48_PA_SC_MPASS_PS_CNTL 0x028A48 #define R_028C00_PA_SC_LINE_CNTL 0x028C00 +#define S_028C00_LAST_PIXEL(x) (((x) & 0x1) << 10) +#define G_028C00_LAST_PIXEL(x) (((x) >> 10) & 0x1) +#define C_028C00_LAST_PIXEL 0xFFFFFBFF #define R_028C04_PA_SC_AA_CONFIG 0x028C04 #define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX 0x028C1C #define R_028C48_PA_SC_AA_MASK 0x028C48 @@ -2125,7 +2112,16 @@ #define R_028814_PA_SU_SC_MODE_CNTL 0x028814 #define R_028A00_PA_SU_POINT_SIZE 0x028A00 #define R_028A04_PA_SU_POINT_MINMAX 0x028A04 +#define S_028A04_MIN_SIZE(x) (((x) & 0xFFFF) << 0) +#define G_028A04_MIN_SIZE(x) (((x) >> 0) & 0xFFFF) +#define C_028A04_MIN_SIZE 0xFFFF0000 +#define S_028A04_MAX_SIZE(x) (((x) & 0xFFFF) << 16) +#define G_028A04_MAX_SIZE(x) (((x) >> 16) & 0xFFFF) +#define C_028A04_MAX_SIZE 0x0000FFFF #define R_028A08_PA_SU_LINE_CNTL 0x028A08 +#define S_028A08_WIDTH(x) (((x) & 0xFFFF) << 0) +#define G_028A08_WIDTH(x) (((x) >> 0) & 0xFFFF) +#define C_028A08_WIDTH 0xFFFF0000 #define R_028A0C_PA_SC_LINE_STIPPLE 0x028A0C #define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 #define R_028DFC_PA_SU_POLY_OFFSET_CLAMP 0x028DFC @@ -2134,6 +2130,27 @@ #define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 #define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C #define R_028818_PA_CL_VTE_CNTL 0x028818 +#define S_028818_VPORT_X_SCALE_ENA(x) (((x) & 0x1) << 0) +#define G_028818_VPORT_X_SCALE_ENA(x) (((x) >> 0 & 0x1) +#define C_028818_VPORT_X_SCALE_ENA 0xFFFFFFFE +#define S_028818_VPORT_X_OFFSET_ENA(x) (((x) & 0x1) << 1) +#define G_028818_VPORT_X_OFFSET_ENA(x) (((x) >> 1 & 0x1) +#define C_028818_VPORT_X_OFFSET_ENA 0xFFFFFFFD +#define S_028818_VPORT_Y_SCALE_ENA(x) (((x) & 0x1) << 2) +#define G_028818_VPORT_Y_SCALE_ENA(x) (((x) >> 2 & 0x1) +#define C_028818_VPORT_Y_SCALE_ENA 0xFFFFFFFB +#define S_028818_VPORT_Y_OFFSET_ENA(x) (((x) & 0x1) << 3) +#define G_028818_VPORT_Y_OFFSET_ENA(x) (((x) >> 3 & 0x1) +#define C_028818_VPORT_Y_OFFSET_ENA 0xFFFFFFF7 +#define S_028818_VPORT_Z_SCALE_ENA(x) (((x) & 0x1) << 4) +#define G_028818_VPORT_Z_SCALE_ENA(x) (((x) >> 4 & 0x1) +#define C_028818_VPORT_Z_SCALE_ENA 0xFFFFFFEF +#define S_028818_VPORT_Z_OFFSET_ENA(x) (((x) & 0x1) << 5) +#define G_028818_VPORT_Z_OFFSET_ENA(x) (((x) >> 5 & 0x1) +#define C_028818_VPORT_Z_OFFSET_ENA 0xFFFFFFDF +#define S_028818_VTX_W0_FMT(x) (((x) & 0x1) << 10) +#define G_028818_VTX_W0_FMT(x) (((x) >> 10) & 0x1) +#define C_028818_VTX_W0_FMT 0xFFFFFBFF #define R_02843C_PA_CL_VPORT_XSCALE_0 0x02843C #define R_028444_PA_CL_VPORT_YSCALE_0 0x028444 #define R_02844C_PA_CL_VPORT_ZSCALE_0 0x02844C @@ -2199,6 +2216,12 @@ #define R_0286C0_SPI_PS_INPUT_CNTL_31 0x0286C0 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850 #define R_028854_SQ_PGM_EXPORTS_PS 0x028854 +#define S_028854_EXPORT_COLORS(x) (((x) & 0xF) << 1) +#define G_028854_EXPORT_COLORS(x) (((x) >> 1) & 0xF) +#define C_028854_EXPORT_COLORS 0xFFFFFFE1 +#define S_028854_EXPORT_Z(x) (((x) & 0x1) << 0) +#define G_028854_EXPORT_Z(x) (((x) >> 0) & 0x1) +#define C_028854_EXPORT_Z 0xFFFFFFFE #define R_008958_VGT_PRIMITIVE_TYPE 0x008958 #define R_028A7C_VGT_DMA_INDEX_TYPE 0x028A7C #define R_028A88_VGT_DMA_NUM_INSTANCES 0x028A88 @@ -3435,6 +3458,16 @@ #define R_038014_RESOURCE0_WORD5 0x038014 #define R_038018_RESOURCE0_WORD6 0x038018 +#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140 +#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180 +#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940 +#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980 + +#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 +#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 + +#define R_03E200_SQ_LOOP_CONST_0 0x3E200 + #define SQ_TEX_INST_LD 0x03 #define SQ_TEX_INST_GET_GRADIENTS_H 0x7 #define SQ_TEX_INST_GET_GRADIENTS_V 0x8 diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index a78cf0ce95..892dee86ba 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -20,12 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "radeon.h" -#include "r600_context.h" -#include "r600_asm.h" +#include <stdio.h> #include "util/u_memory.h" +#include "r600_pipe.h" +#include "r600_asm.h" #include "r700_sq.h" -#include <stdio.h> int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) @@ -37,7 +36,7 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC0_REL(alu->src[1].rel) | + S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | S_SQ_ALU_WORD0_LAST(alu->last); diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h deleted file mode 100644 index 5f9f21db1b..0000000000 --- a/src/gallium/drivers/r600/radeon.h +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org> - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ -#ifndef RADEON_H -#define RADEON_H - -#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) - -#include <stdint.h> - -#include <pipe/p_compiler.h> - -typedef uint64_t u64; -typedef uint32_t u32; -typedef uint16_t u16; -typedef uint8_t u8; - -struct radeon; - -enum radeon_family { - CHIP_UNKNOWN, - CHIP_R100, - CHIP_RV100, - CHIP_RS100, - CHIP_RV200, - CHIP_RS200, - CHIP_R200, - CHIP_RV250, - CHIP_RS300, - CHIP_RV280, - CHIP_R300, - CHIP_R350, - CHIP_RV350, - CHIP_RV380, - CHIP_R420, - CHIP_R423, - CHIP_RV410, - CHIP_RS400, - CHIP_RS480, - CHIP_RS600, - CHIP_RS690, - CHIP_RS740, - CHIP_RV515, - CHIP_R520, - CHIP_RV530, - CHIP_RV560, - CHIP_RV570, - CHIP_R580, - CHIP_R600, - CHIP_RV610, - CHIP_RV630, - CHIP_RV670, - CHIP_RV620, - CHIP_RV635, - CHIP_RS780, - CHIP_RS880, - CHIP_RV770, - CHIP_RV730, - CHIP_RV710, - CHIP_RV740, - CHIP_CEDAR, - CHIP_REDWOOD, - CHIP_JUNIPER, - CHIP_CYPRESS, - CHIP_HEMLOCK, - CHIP_LAST, -}; - -enum { - R600_SHADER_PS = 1, - R600_SHADER_VS, - R600_SHADER_GS, - R600_SHADER_FS, - R600_SHADER_MAX = R600_SHADER_FS, -}; - -enum radeon_family radeon_get_family(struct radeon *rw); -void radeon_set_mem_constant(struct radeon *radeon, boolean state); - -/* lowlevel WS bo */ -struct radeon_ws_bo; -struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon, - unsigned size, unsigned alignment, unsigned usage); -struct radeon_ws_bo *radeon_ws_bo_handle(struct radeon *radeon, - unsigned handle); -void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned usage, void *ctx); -void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo); -void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, - struct radeon_ws_bo *src); -int radeon_ws_bo_wait(struct radeon *radeon, struct radeon_ws_bo *bo); - -struct radeon_stype_info; -/* - * states functions - */ -struct radeon_state { - struct radeon *radeon; - unsigned refcount; - struct radeon_stype_info *stype; - unsigned state_id; - unsigned id; - unsigned shader_index; - unsigned nstates; - u32 states[64]; - unsigned npm4; - unsigned cpm4; - u32 pm4_crc; - u32 pm4[128]; - unsigned nbo; - struct radeon_ws_bo *bo[4]; - unsigned nreloc; - unsigned reloc_pm4_id[8]; - unsigned reloc_bo_id[8]; - u32 placement[8]; - unsigned bo_dirty[4]; -}; - -int radeon_state_init(struct radeon_state *rstate, struct radeon *radeon, u32 type, u32 id, u32 shader_class); -void radeon_state_fini(struct radeon_state *state); -int radeon_state_pm4(struct radeon_state *state); -int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type); - -/* - * draw functions - */ -struct radeon_draw { - struct radeon *radeon; - struct radeon_state **state; -}; - -int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon); -void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state); -void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state); - -/* - * radeon context functions - */ -#pragma pack(1) -struct radeon_cs_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -struct radeon_ctx; - -struct radeon_ctx *radeon_ctx_init(struct radeon *radeon); -void radeon_ctx_fini(struct radeon_ctx *ctx); -void radeon_ctx_clear(struct radeon_ctx *ctx); -int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw); -int radeon_ctx_submit(struct radeon_ctx *ctx); -void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); -int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state); - -/* - * R600/R700 - */ - -enum r600_stype { - R600_STATE_CONFIG, - R600_STATE_CB_CNTL, - R600_STATE_RASTERIZER, - R600_STATE_VIEWPORT, - R600_STATE_SCISSOR, - R600_STATE_BLEND, - R600_STATE_DSA, - R600_STATE_SHADER, /* has PS,VS,GS,FS variants */ - R600_STATE_CONSTANT, /* has PS,VS,GS,FS variants */ - R600_STATE_CBUF, /* has PS,VS,GS,FS variants */ - R600_STATE_RESOURCE, /* has PS,VS,GS,FS variants */ - R600_STATE_SAMPLER, /* has PS,VS,GS,FS variants */ - R600_STATE_SAMPLER_BORDER, /* has PS,VS,GS,FS variants */ - R600_STATE_CB0, - R600_STATE_CB1, - R600_STATE_CB2, - R600_STATE_CB3, - R600_STATE_CB4, - R600_STATE_CB5, - R600_STATE_CB6, - R600_STATE_CB7, - R600_STATE_DB, - R600_STATE_QUERY_BEGIN, - R600_STATE_QUERY_END, - R600_STATE_UCP, - R600_STATE_VGT, - R600_STATE_DRAW, - R600_STATE_CB_FLUSH, - R600_STATE_DB_FLUSH, - R600_STATE_MAX, -}; - -#include "r600_states_inc.h" -#include "eg_states_inc.h" - -/* R600 QUERY BEGIN/END */ -#define R600_QUERY__OFFSET 0 -#define R600_QUERY_SIZE 1 -#define R600_QUERY_PM4 128 - -void r600_flush_ctx(void *data); -#endif diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 3ffda87520..413da59e55 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -43,6 +43,7 @@ rbug_destroy(struct pipe_context *_pipe) struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + remove_from_list(&rb_pipe->list); pipe->destroy(pipe); FREE(rb_pipe); diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 35d426aa3e..28953582f0 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -23,8 +23,8 @@ C_SOURCES = \ sp_state_blend.c \ sp_state_clip.c \ sp_state_derived.c \ - sp_state_fs.c \ sp_state_sampler.c \ + sp_state_shader.c \ sp_state_so.c \ sp_state_rasterizer.c \ sp_state_surface.c \ diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index be5917a688..d5f4d28aef 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -24,9 +24,9 @@ softpipe = env.ConvenienceLibrary( 'sp_state_blend.c', 'sp_state_clip.c', 'sp_state_derived.c', - 'sp_state_fs.c', 'sp_state_rasterizer.c', 'sp_state_sampler.c', + 'sp_state_shader.c', 'sp_state_so.c', 'sp_state_surface.c', 'sp_state_vertex.c', diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index a7c9959b3e..b5d30bc6fc 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -228,61 +228,17 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.priv = priv; /* state setters */ - softpipe->pipe.create_blend_state = softpipe_create_blend_state; - softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; - softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; - - softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; - softpipe->pipe.bind_fragment_sampler_states = softpipe_bind_sampler_states; - softpipe->pipe.bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states; - softpipe->pipe.bind_geometry_sampler_states = softpipe_bind_geometry_sampler_states; - softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; - - softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; - softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; - softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; - - softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; - softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; - softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; - - softpipe->pipe.create_fs_state = softpipe_create_fs_state; - softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; - softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; - - softpipe->pipe.create_vs_state = softpipe_create_vs_state; - softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; - softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; - - softpipe->pipe.create_gs_state = softpipe_create_gs_state; - softpipe->pipe.bind_gs_state = softpipe_bind_gs_state; - softpipe->pipe.delete_gs_state = softpipe_delete_gs_state; - - softpipe->pipe.create_vertex_elements_state = softpipe_create_vertex_elements_state; - softpipe->pipe.bind_vertex_elements_state = softpipe_bind_vertex_elements_state; - softpipe->pipe.delete_vertex_elements_state = softpipe_delete_vertex_elements_state; - - softpipe->pipe.create_stream_output_state = softpipe_create_stream_output_state; - softpipe->pipe.bind_stream_output_state = softpipe_bind_stream_output_state; - softpipe->pipe.delete_stream_output_state = softpipe_delete_stream_output_state; - - softpipe->pipe.set_blend_color = softpipe_set_blend_color; - softpipe->pipe.set_stencil_ref = softpipe_set_stencil_ref; - softpipe->pipe.set_clip_state = softpipe_set_clip_state; - softpipe->pipe.set_sample_mask = softpipe_set_sample_mask; - softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; + softpipe_init_blend_funcs(&softpipe->pipe); + softpipe_init_clip_funcs(&softpipe->pipe); + softpipe_init_query_funcs( softpipe ); + softpipe_init_rasterizer_funcs(&softpipe->pipe); + softpipe_init_sampler_funcs(&softpipe->pipe); + softpipe_init_shader_funcs(&softpipe->pipe); + softpipe_init_streamout_funcs(&softpipe->pipe); + softpipe_init_texture_funcs( &softpipe->pipe ); + softpipe_init_vertex_funcs(&softpipe->pipe); + softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; - softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; - softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; - softpipe->pipe.set_fragment_sampler_views = softpipe_set_sampler_views; - softpipe->pipe.set_vertex_sampler_views = softpipe_set_vertex_sampler_views; - softpipe->pipe.set_geometry_sampler_views = softpipe_set_geometry_sampler_views; - softpipe->pipe.create_sampler_view = softpipe_create_sampler_view; - softpipe->pipe.sampler_view_destroy = softpipe_sampler_view_destroy; - softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; - softpipe->pipe.set_stream_output_buffers = softpipe_set_stream_output_buffers; - softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; - softpipe->pipe.set_index_buffer = softpipe_set_index_buffer; softpipe->pipe.draw_vbo = softpipe_draw_vbo; softpipe->pipe.draw_stream_output = softpipe_draw_stream_output; @@ -292,9 +248,6 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.is_resource_referenced = softpipe_is_resource_referenced; - softpipe_init_query_funcs( softpipe ); - softpipe_init_texture_funcs( &softpipe->pipe ); - softpipe->pipe.render_condition = softpipe_render_condition; /* diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 67e2c8f8bc..346e1b402b 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -158,9 +158,17 @@ exec_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_POSITION: { uint j; - for (j = 0; j < 4; j++) { + + for (j = 0; j < 4; j++) quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; - } + } + break; + case TGSI_SEMANTIC_STENCIL: + { + uint j; + + for (j = 0; j < 4; j++) + quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].f[j]; } break; } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index daa158df7c..5b18cd035e 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -169,9 +169,15 @@ fs_sse_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_POSITION: { uint j; - for (j = 0; j < 4; j++) { - quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; - } + for (j = 0; j < 4; j++) + quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; + } + break; + case TGSI_SEMANTIC_STENCIL: + { + uint j; + for (j = 0; j < 4; j++) + quad->output.stencil[j] = machine->Outputs[i].xyzw[1].f[j]; } break; } diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h index a3236bd116..e745aa8061 100644 --- a/src/gallium/drivers/softpipe/sp_quad.h +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -85,6 +85,7 @@ struct quad_header_output /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; float depth[QUAD_SIZE]; + uint8_t stencil[QUAD_SIZE]; }; diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 5590d40892..c8f5f89568 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -47,6 +47,8 @@ struct depth_data { unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ ubyte stencilVals[QUAD_SIZE]; + boolean use_shader_stencil_refs; + ubyte shader_stencil_refs[QUAD_SIZE]; struct softpipe_cached_tile *tile; }; @@ -186,6 +188,33 @@ convert_quad_depth( struct depth_data *data, } +/** + * Compute the depth_data::shader_stencil_refs[] values from the float fragment stencil values. + */ +static void +convert_quad_stencil( struct depth_data *data, + const struct quad_header *quad ) +{ + unsigned j; + + data->use_shader_stencil_refs = TRUE; + /* Copy quads stencil values + */ + switch (data->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + { + for (j = 0; j < QUAD_SIZE; j++) { + data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); + } + } + break; + default: + assert(0); + } +} /** * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer. @@ -272,8 +301,14 @@ do_stencil_test(struct depth_data *data, { unsigned passMask = 0x0; unsigned j; + ubyte refs[QUAD_SIZE]; - ref &= valMask; + for (j = 0; j < QUAD_SIZE; j++) { + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j] & valMask; + else + refs[j] = ref & valMask; + } switch (func) { case PIPE_FUNC_NEVER: @@ -281,42 +316,42 @@ do_stencil_test(struct depth_data *data, break; case PIPE_FUNC_LESS: for (j = 0; j < QUAD_SIZE; j++) { - if (ref < (data->stencilVals[j] & valMask)) { + if (refs[j] < (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_EQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref == (data->stencilVals[j] & valMask)) { + if (refs[j] == (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_LEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref <= (data->stencilVals[j] & valMask)) { + if (refs[j] <= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GREATER: for (j = 0; j < QUAD_SIZE; j++) { - if (ref > (data->stencilVals[j] & valMask)) { + if (refs[j] > (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_NOTEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref != (data->stencilVals[j] & valMask)) { + if (refs[j] != (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref >= (data->stencilVals[j] & valMask)) { + if (refs[j] >= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } @@ -348,9 +383,14 @@ apply_stencil_op(struct depth_data *data, { unsigned j; ubyte newstencil[QUAD_SIZE]; + ubyte refs[QUAD_SIZE]; for (j = 0; j < QUAD_SIZE; j++) { newstencil[j] = data->stencilVals[j]; + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j]; + else + refs[j] = ref; } switch (op) { @@ -367,7 +407,7 @@ apply_stencil_op(struct depth_data *data, case PIPE_STENCIL_OP_REPLACE: for (j = 0; j < QUAD_SIZE; j++) { if (mask & (1 << j)) { - newstencil[j] = ref; + newstencil[j] = refs[j]; } } break; @@ -688,15 +728,18 @@ depth_test_quads_fallback(struct quad_stage *qs, unsigned i, pass = 0; const struct sp_fragment_shader *fs = qs->softpipe->fs; boolean interp_depth = !fs->info.writes_z; + boolean shader_stencil_ref = fs->info.writes_stencil; struct depth_data data; + data.use_shader_stencil_refs = FALSE; if (qs->softpipe->depth_stencil->alpha.enabled) { nr = alpha_test_quads(qs, quads, nr); } - if (qs->softpipe->depth_stencil->depth.enabled || - qs->softpipe->depth_stencil->stencil[0].enabled) { + if (qs->softpipe->framebuffer.zsbuf && + (qs->softpipe->depth_stencil->depth.enabled || + qs->softpipe->depth_stencil->stencil[0].enabled)) { data.ps = qs->softpipe->framebuffer.zsbuf; data.format = data.ps->format; @@ -715,6 +758,9 @@ depth_test_quads_fallback(struct quad_stage *qs, } if (qs->softpipe->depth_stencil->stencil[0].enabled) { + if (shader_stencil_ref) + convert_quad_stencil(&data, quads[i]); + depth_stencil_test_quad(qs, &data, quads[i]); write_depth_stencil_values(&data, quads[i]); } @@ -805,6 +851,9 @@ choose_depth_test(struct quad_stage *qs, boolean occlusion = qs->softpipe->active_query_count; + if(!qs->softpipe->framebuffer.zsbuf) + depth = depthwrite = stencil = FALSE; + /* default */ qs->run = depth_test_quads_fallback; diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 43b8e88e33..2cfd02a22c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -47,7 +47,8 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && !sp->fs->info.uses_kill && - !sp->fs->info.writes_z; + !sp->fs->info.writes_z && + !sp->fs->info.writes_stencil; sp->quad.first = sp->quad.blend; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 2053d02f62..37557d1194 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -114,6 +114,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 1; default: return 0; } diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 39d204de8a..525bf23734 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -70,6 +70,8 @@ struct sp_fragment_shader { struct tgsi_shader_info info; + struct draw_fragment_shader *draw_shader; + boolean origin_lower_left; /**< fragment shader uses lower left position origin? */ boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */ @@ -113,126 +115,40 @@ struct sp_so_state { }; -void * -softpipe_create_blend_state(struct pipe_context *, - const struct pipe_blend_state *); -void softpipe_bind_blend_state(struct pipe_context *, - void *); -void softpipe_delete_blend_state(struct pipe_context *, - void *); - -void * -softpipe_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); -void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); void -softpipe_bind_vertex_sampler_states(struct pipe_context *, - unsigned num_samplers, - void **samplers); -void -softpipe_bind_geometry_sampler_states(struct pipe_context *, - unsigned num_samplers, - void **samplers); -void softpipe_delete_sampler_state(struct pipe_context *, void *); - -void * -softpipe_create_depth_stencil_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); -void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); -void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); - -void * -softpipe_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void softpipe_bind_rasterizer_state(struct pipe_context *, void *); -void softpipe_delete_rasterizer_state(struct pipe_context *, void *); - -void softpipe_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - -void softpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - -void softpipe_set_stencil_ref( struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref ); - -void softpipe_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void softpipe_set_sample_mask( struct pipe_context *, - unsigned sample_mask ); - -void softpipe_set_constant_buffer(struct pipe_context *, - uint shader, uint index, - struct pipe_resource *buf); - -void *softpipe_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_fs_state(struct pipe_context *, void *); -void softpipe_delete_fs_state(struct pipe_context *, void *); -void *softpipe_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_vs_state(struct pipe_context *, void *); -void softpipe_delete_vs_state(struct pipe_context *, void *); -void *softpipe_create_gs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_gs_state(struct pipe_context *, void *); -void softpipe_delete_gs_state(struct pipe_context *, void *); - -void *softpipe_create_vertex_elements_state(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); -void softpipe_bind_vertex_elements_state(struct pipe_context *, void *); -void softpipe_delete_vertex_elements_state(struct pipe_context *, void *); - -void softpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - -void softpipe_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); - -void softpipe_set_sampler_views( struct pipe_context *, - unsigned num, - struct pipe_sampler_view ** ); +softpipe_init_blend_funcs(struct pipe_context *pipe); void -softpipe_set_vertex_sampler_views(struct pipe_context *, - unsigned num, - struct pipe_sampler_view **); +softpipe_init_clip_funcs(struct pipe_context *pipe); void -softpipe_set_geometry_sampler_views(struct pipe_context *, - unsigned num, - struct pipe_sampler_view **); - -struct pipe_sampler_view * -softpipe_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ); +softpipe_init_sampler_funcs(struct pipe_context *pipe); void -softpipe_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view); +softpipe_init_rasterizer_funcs(struct pipe_context *pipe); -void softpipe_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); - -void softpipe_set_vertex_buffers(struct pipe_context *, - unsigned count, - const struct pipe_vertex_buffer *); +void +softpipe_init_shader_funcs(struct pipe_context *pipe); -void softpipe_set_index_buffer(struct pipe_context *, - const struct pipe_index_buffer *); +void +softpipe_init_streamout_funcs(struct pipe_context *pipe); +void +softpipe_init_vertex_funcs(struct pipe_context *pipe); -void softpipe_update_derived( struct softpipe_context *softpipe ); +void +softpipe_set_framebuffer_state(struct pipe_context *, + const struct pipe_framebuffer_state *); +void +softpipe_update_derived( struct softpipe_context *softpipe ); void softpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); -void softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode); +void +softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode); void softpipe_map_transfers(struct softpipe_context *sp); @@ -253,20 +169,5 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe); struct vertex_info * softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); -void * -softpipe_create_stream_output_state( - struct pipe_context *pipe, - const struct pipe_stream_output_state *templ); -void -softpipe_bind_stream_output_state(struct pipe_context *pipe, - void *so); -void -softpipe_delete_stream_output_state(struct pipe_context *pipe, void *so); - -void -softpipe_set_stream_output_buffers(struct pipe_context *pipe, - struct pipe_resource **buffers, - int *offsets, - int num_buffers); #endif diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index 2a203f44e5..12863824b8 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -34,15 +34,17 @@ #include "sp_state.h" -void * +static void * softpipe_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { return mem_dup(blend, sizeof(*blend)); } -void softpipe_bind_blend_state( struct pipe_context *pipe, - void *blend ) + +static void +softpipe_bind_blend_state(struct pipe_context *pipe, + void *blend) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -53,15 +55,18 @@ void softpipe_bind_blend_state( struct pipe_context *pipe, softpipe->dirty |= SP_NEW_BLEND; } -void softpipe_delete_blend_state(struct pipe_context *pipe, - void *blend) + +static void +softpipe_delete_blend_state(struct pipe_context *pipe, + void *blend) { FREE( blend ); } -void softpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) +static void +softpipe_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -73,19 +78,15 @@ void softpipe_set_blend_color( struct pipe_context *pipe, } -/** XXX move someday? Or consolidate all these simple state setters - * into one file. - */ - - -void * +static void * softpipe_create_depth_stencil_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *depth_stencil) { return mem_dup(depth_stencil, sizeof(*depth_stencil)); } -void + +static void softpipe_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) { @@ -96,14 +97,17 @@ softpipe_bind_depth_stencil_state(struct pipe_context *pipe, softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; } -void + +static void softpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) { FREE( depth ); } -void softpipe_set_stencil_ref( struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref ) + +static void +softpipe_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *stencil_ref) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -112,9 +116,28 @@ void softpipe_set_stencil_ref( struct pipe_context *pipe, softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; } -void + +static void softpipe_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) { } + +void +softpipe_init_blend_funcs(struct pipe_context *pipe) +{ + pipe->create_blend_state = softpipe_create_blend_state; + pipe->bind_blend_state = softpipe_bind_blend_state; + pipe->delete_blend_state = softpipe_delete_blend_state; + + pipe->set_blend_color = softpipe_set_blend_color; + + pipe->create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; + pipe->bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; + pipe->delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; + + pipe->set_stencil_ref = softpipe_set_stencil_ref; + + pipe->set_sample_mask = softpipe_set_sample_mask; +} diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c index 4946c776e3..f3a4c234e2 100644 --- a/src/gallium/drivers/softpipe/sp_state_clip.c +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -32,8 +32,9 @@ #include "draw/draw_context.h" -void softpipe_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) +static void +softpipe_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -42,8 +43,9 @@ void softpipe_set_clip_state( struct pipe_context *pipe, } -void softpipe_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) +static void +softpipe_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *viewport) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -55,8 +57,9 @@ void softpipe_set_viewport_state( struct pipe_context *pipe, } -void softpipe_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) +static void +softpipe_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *scissor) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -67,8 +70,9 @@ void softpipe_set_scissor_state( struct pipe_context *pipe, } -void softpipe_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) +static void +softpipe_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -77,3 +81,13 @@ void softpipe_set_polygon_stipple( struct pipe_context *pipe, softpipe->poly_stipple = *stipple; /* struct copy */ softpipe->dirty |= SP_NEW_STIPPLE; } + + +void +softpipe_init_clip_funcs(struct pipe_context *pipe) +{ + pipe->set_clip_state = softpipe_set_clip_state; + pipe->set_viewport_state = softpipe_set_viewport_state; + pipe->set_scissor_state = softpipe_set_scissor_state; + pipe->set_polygon_stipple = softpipe_set_polygon_stipple; +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index c9ede09f26..3cd4acd743 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -33,15 +33,17 @@ -void * +static void * softpipe_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *rast) { return mem_dup(rast, sizeof(*rast)); } -void softpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *rasterizer) + +static void +softpipe_bind_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -56,10 +58,19 @@ void softpipe_bind_rasterizer_state(struct pipe_context *pipe, softpipe->dirty |= SP_NEW_RASTERIZER; } -void softpipe_delete_rasterizer_state(struct pipe_context *pipe, - void *rasterizer) + +static void +softpipe_delete_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) { FREE( rasterizer ); } +void +softpipe_init_rasterizer_funcs(struct pipe_context *pipe) +{ + pipe->create_rasterizer_state = softpipe_create_rasterizer_state; + pipe->bind_rasterizer_state = softpipe_bind_rasterizer_state; + pipe->delete_rasterizer_state = softpipe_delete_rasterizer_state; +} diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 79d9516ad9..b59fbc33ed 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -33,7 +33,6 @@ #include "util/u_inlines.h" #include "draw/draw_context.h" -#include "draw/draw_context.h" #include "sp_context.h" #include "sp_state.h" @@ -54,7 +53,7 @@ static struct sp_sampler *sp_sampler( struct pipe_sampler_state *sampler ) } -void * +static void * softpipe_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { @@ -67,7 +66,7 @@ softpipe_create_sampler_state(struct pipe_context *pipe, } -void +static void softpipe_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { @@ -94,7 +93,7 @@ softpipe_bind_sampler_states(struct pipe_context *pipe, } -void +static void softpipe_bind_vertex_sampler_states(struct pipe_context *pipe, unsigned num_samplers, void **samplers) @@ -118,10 +117,14 @@ softpipe_bind_vertex_sampler_states(struct pipe_context *pipe, softpipe->num_vertex_samplers = num_samplers; + draw_set_samplers(softpipe->draw, + softpipe->vertex_samplers, + softpipe->num_vertex_samplers); + softpipe->dirty |= SP_NEW_SAMPLER; } -void +static void softpipe_bind_geometry_sampler_states(struct pipe_context *pipe, unsigned num_samplers, void **samplers) @@ -149,7 +152,7 @@ softpipe_bind_geometry_sampler_states(struct pipe_context *pipe, } -struct pipe_sampler_view * +static struct pipe_sampler_view * softpipe_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *resource, const struct pipe_sampler_view *templ) @@ -168,7 +171,7 @@ softpipe_create_sampler_view(struct pipe_context *pipe, } -void +static void softpipe_sampler_view_destroy(struct pipe_context *pipe, struct pipe_sampler_view *view) { @@ -177,7 +180,7 @@ softpipe_sampler_view_destroy(struct pipe_context *pipe, } -void +static void softpipe_set_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -207,7 +210,7 @@ softpipe_set_sampler_views(struct pipe_context *pipe, } -void +static void softpipe_set_vertex_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -234,10 +237,15 @@ softpipe_set_vertex_sampler_views(struct pipe_context *pipe, softpipe->num_vertex_sampler_views = num; + draw_set_sampler_views(softpipe->draw, + softpipe->vertex_sampler_views, + softpipe->num_vertex_sampler_views); + softpipe->dirty |= SP_NEW_TEXTURE; } -void + +static void softpipe_set_geometry_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -319,8 +327,6 @@ get_sampler_varient( unsigned unit, } - - void softpipe_reset_sampler_varients(struct softpipe_context *softpipe) { @@ -395,9 +401,7 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) } } - - -void +static void softpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { @@ -413,4 +417,20 @@ softpipe_delete_sampler_state(struct pipe_context *pipe, } +void +softpipe_init_sampler_funcs(struct pipe_context *pipe) +{ + pipe->create_sampler_state = softpipe_create_sampler_state; + pipe->bind_fragment_sampler_states = softpipe_bind_sampler_states; + pipe->bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states; + pipe->bind_geometry_sampler_states = softpipe_bind_geometry_sampler_states; + pipe->delete_sampler_state = softpipe_delete_sampler_state; + + pipe->set_fragment_sampler_views = softpipe_set_sampler_views; + pipe->set_vertex_sampler_views = softpipe_set_vertex_sampler_views; + pipe->set_geometry_sampler_views = softpipe_set_geometry_sampler_views; + + pipe->create_sampler_view = softpipe_create_sampler_view; + pipe->sampler_view_destroy = softpipe_sampler_view_destroy; +} diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_shader.c index ded242d3dc..7fff338cce 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -42,7 +42,7 @@ #include "tgsi/tgsi_parse.h" -void * +static void * softpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -60,7 +60,15 @@ softpipe_create_fs_state(struct pipe_context *pipe, state = softpipe_create_fs_exec( softpipe, templ ); } - assert(state); + if (!state) + return NULL; + + /* draw's fs state */ + state->draw_shader = draw_create_fragment_shader(softpipe->draw, templ); + if (!state->draw_shader) { + state->delete( state ); + return NULL; + } /* get/save the summary info for this shader */ tgsi_scan_shader(templ->tokens, &state->info); @@ -76,7 +84,7 @@ softpipe_create_fs_state(struct pipe_context *pipe, } -void +static void softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -90,11 +98,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) softpipe->fs = fs; + draw_bind_fragment_shader(softpipe->draw, + (softpipe->fs ? softpipe->fs->draw_shader : NULL)); + softpipe->dirty |= SP_NEW_FS; } -void +static void softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -109,11 +120,13 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) tgsi_exec_machine_bind_shader(softpipe->fs_machine, NULL, 0, NULL); } + draw_delete_fragment_shader(softpipe->draw, state->draw_shader); + state->delete( state ); } -void * +static void * softpipe_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -148,7 +161,7 @@ fail: } -void +static void softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -162,7 +175,7 @@ softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) } -void +static void softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -174,34 +187,8 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) FREE( state ); } -void -softpipe_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - struct pipe_resource *constants) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); - unsigned size = constants ? constants->width0 : 0; - const void *data = constants ? softpipe_resource(constants)->data : NULL; - - assert(shader < PIPE_SHADER_TYPES); - draw_flush(softpipe->draw); - - /* note: reference counting */ - pipe_resource_reference(&softpipe->constants[shader][index], constants); - - if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { - draw_set_mapped_constant_buffer(softpipe->draw, shader, index, data, size); - } - - softpipe->mapped_constants[shader][index] = data; - softpipe->const_buffer_size[shader][index] = size; - - softpipe->dirty |= SP_NEW_CONSTANTS; -} - - -void * +static void * softpipe_create_gs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -240,7 +227,7 @@ fail: } -void +static void softpipe_bind_gs_state(struct pipe_context *pipe, void *gs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -254,7 +241,7 @@ softpipe_bind_gs_state(struct pipe_context *pipe, void *gs) } -void +static void softpipe_delete_gs_state(struct pipe_context *pipe, void *gs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -266,3 +253,49 @@ softpipe_delete_gs_state(struct pipe_context *pipe, void *gs) (state) ? state->draw_data : 0); FREE(state); } + + +static void +softpipe_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + struct pipe_resource *constants) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned size = constants ? constants->width0 : 0; + const void *data = constants ? softpipe_resource(constants)->data : NULL; + + assert(shader < PIPE_SHADER_TYPES); + + draw_flush(softpipe->draw); + + /* note: reference counting */ + pipe_resource_reference(&softpipe->constants[shader][index], constants); + + if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { + draw_set_mapped_constant_buffer(softpipe->draw, shader, index, data, size); + } + + softpipe->mapped_constants[shader][index] = data; + softpipe->const_buffer_size[shader][index] = size; + + softpipe->dirty |= SP_NEW_CONSTANTS; +} + + +void +softpipe_init_shader_funcs(struct pipe_context *pipe) +{ + pipe->create_fs_state = softpipe_create_fs_state; + pipe->bind_fs_state = softpipe_bind_fs_state; + pipe->delete_fs_state = softpipe_delete_fs_state; + + pipe->create_vs_state = softpipe_create_vs_state; + pipe->bind_vs_state = softpipe_bind_vs_state; + pipe->delete_vs_state = softpipe_delete_vs_state; + + pipe->create_gs_state = softpipe_create_gs_state; + pipe->bind_gs_state = softpipe_bind_gs_state; + pipe->delete_gs_state = softpipe_delete_gs_state; + + pipe->set_constant_buffer = softpipe_set_constant_buffer; +} diff --git a/src/gallium/drivers/softpipe/sp_state_so.c b/src/gallium/drivers/softpipe/sp_state_so.c index cfe23f9e84..ddfa3ef765 100644 --- a/src/gallium/drivers/softpipe/sp_state_so.c +++ b/src/gallium/drivers/softpipe/sp_state_so.c @@ -34,7 +34,7 @@ #include "draw/draw_context.h" -void * +static void * softpipe_create_stream_output_state(struct pipe_context *pipe, const struct pipe_stream_output_state *templ) { @@ -57,7 +57,8 @@ softpipe_create_stream_output_state(struct pipe_context *pipe, return so; } -void + +static void softpipe_bind_stream_output_state(struct pipe_context *pipe, void *so) { @@ -72,13 +73,15 @@ softpipe_bind_stream_output_state(struct pipe_context *pipe, draw_set_so_state(softpipe->draw, &sp_so->base); } -void + +static void softpipe_delete_stream_output_state(struct pipe_context *pipe, void *so) { FREE( so ); } -void + +static void softpipe_set_stream_output_buffers(struct pipe_context *pipe, struct pipe_resource **buffers, int *offsets, @@ -122,3 +125,16 @@ softpipe_set_stream_output_buffers(struct pipe_context *pipe, draw_set_mapped_so_buffers(softpipe->draw, map_buffers, num_buffers); } + + + +void +softpipe_init_streamout_funcs(struct pipe_context *pipe) +{ + pipe->create_stream_output_state = softpipe_create_stream_output_state; + pipe->bind_stream_output_state = softpipe_bind_stream_output_state; + pipe->delete_stream_output_state = softpipe_delete_stream_output_state; + + pipe->set_stream_output_buffers = softpipe_set_stream_output_buffers; +} + diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index b650fcaea5..7d8055f2ba 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -36,7 +36,7 @@ #include "draw/draw_context.h" -void * +static void * softpipe_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *attribs) @@ -51,7 +51,8 @@ softpipe_create_vertex_elements_state(struct pipe_context *pipe, return velems; } -void + +static void softpipe_bind_vertex_elements_state(struct pipe_context *pipe, void *velems) { @@ -66,13 +67,15 @@ softpipe_bind_vertex_elements_state(struct pipe_context *pipe, draw_set_vertex_elements(softpipe->draw, sp_velems->count, sp_velems->velem); } -void + +static void softpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) { FREE( velems ); } -void + +static void softpipe_set_vertex_buffers(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_buffer *buffers) @@ -89,7 +92,8 @@ softpipe_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(softpipe->draw, count, buffers); } -void + +static void softpipe_set_index_buffer(struct pipe_context *pipe, const struct pipe_index_buffer *ib) { @@ -102,3 +106,15 @@ softpipe_set_index_buffer(struct pipe_context *pipe, draw_set_index_buffer(softpipe->draw, ib); } + + +void +softpipe_init_vertex_funcs(struct pipe_context *pipe) +{ + pipe->create_vertex_elements_state = softpipe_create_vertex_elements_state; + pipe->bind_vertex_elements_state = softpipe_bind_vertex_elements_state; + pipe->delete_vertex_elements_state = softpipe_delete_vertex_elements_state; + + pipe->set_vertex_buffers = softpipe_set_vertex_buffers; + pipe->set_index_buffer = softpipe_set_index_buffer; +} diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index e654bb77c2..088e48f81f 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -105,14 +105,14 @@ lerp_3d(float a, float b, float c, /** * Compute coord % size for repeat wrap modes. - * Note that if coord is a signed integer, coord % size doesn't give - * the right value for coord < 0 (in terms of texture repeat). Just - * casting to unsigned fixes that. + * Note that if coord is negative, coord % size doesn't give the right + * value. To avoid that problem we add a large multiple of the size + * (rather than using a conditional). */ static INLINE int repeat(int coord, unsigned size) { - return (int) ((unsigned) coord % size); + return (coord + size * 1024) % size; } @@ -656,7 +656,8 @@ get_texel_2d(const struct sp_sampler_varient *samp, if (x < 0 || x >= (int) u_minify(texture->width0, level) || y < 0 || y >= (int) u_minify(texture->height0, level)) { - return samp->sampler->border_color; + return sp_tex_tile_cache_border_color(samp->cache, + samp->sampler->border_color); } else { return get_texel_2d_no_border( samp, addr, x, y ); @@ -750,7 +751,8 @@ get_texel_3d(const struct sp_sampler_varient *samp, if (x < 0 || x >= (int) u_minify(texture->width0, level) || y < 0 || y >= (int) u_minify(texture->height0, level) || z < 0 || z >= (int) u_minify(texture->depth0, level)) { - return samp->sampler->border_color; + return sp_tex_tile_cache_border_color(samp->cache, + samp->sampler->border_color); } else { return get_texel_3d_no_border( samp, addr, x, y, z ); diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index eb74f14a7b..e817c0c8cf 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -298,3 +298,23 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, +/** + * Return the swizzled border color. + */ +const float * +sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc, + const float border_color[4]) +{ + float rgba01[6]; + + COPY_4V(rgba01, border_color); + rgba01[PIPE_SWIZZLE_ZERO] = 0.0f; + rgba01[PIPE_SWIZZLE_ONE] = 1.0f; + + tc->swz_border_color[0] = rgba01[tc->swizzle_r]; + tc->swz_border_color[1] = rgba01[tc->swizzle_g]; + tc->swz_border_color[2] = rgba01[tc->swizzle_b]; + tc->swz_border_color[3] = rgba01[tc->swizzle_a]; + + return tc->swz_border_color; +} diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h index 0794ffa0c5..05f25133da 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -90,6 +90,8 @@ struct softpipe_tex_tile_cache unsigned format; struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */ + + float swz_border_color[4]; /**< swizzled border color */ }; @@ -154,7 +156,9 @@ sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc, } - +const float * +sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc, + const float border_color[4]); #endif /* SP_TEX_TILE_CACHE_H */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index bf33fd9417..aa76b8aa1e 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -38,6 +38,8 @@ #include "util/u_tile.h" #include "sp_tile_cache.h" +static struct softpipe_cached_tile * +sp_alloc_tile(struct softpipe_tile_cache *tc); /** @@ -94,9 +96,19 @@ sp_create_tile_cache( struct pipe_context *pipe ) if (tc) { tc->pipe = pipe; for (pos = 0; pos < NUM_ENTRIES; pos++) { - tc->entries[pos].addr.bits.invalid = 1; + tc->tile_addrs[pos].bits.invalid = 1; + } + tc->last_tile_addr.bits.invalid = 1; + + /* this allocation allows us to guarantee that allocation + * failures are never fatal later + */ + tc->tile = MALLOC_STRUCT( softpipe_cached_tile ); + if (!tc->tile) + { + FREE(tc); + return NULL; } - tc->last_tile = &tc->entries[0]; /* any tile */ /* XXX this code prevents valgrind warnings about use of uninitialized * memory in programs that don't clear the surface before rendering. @@ -120,7 +132,10 @@ sp_destroy_tile_cache(struct softpipe_tile_cache *tc) for (pos = 0; pos < NUM_ENTRIES; pos++) { /*assert(tc->entries[pos].x < 0);*/ + FREE( tc->entries[pos] ); } + FREE( tc->tile ); + if (tc->transfer) { tc->pipe->transfer_destroy(tc->pipe, tc->transfer); } @@ -285,11 +300,14 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) uint numCleared = 0; assert(pt->resource); + if (!tc->tile) + tc->tile = sp_alloc_tile(tc); + /* clear the scratch tile to the clear value */ if (tc->depth_stencil) { - clear_tile(&tc->tile, pt->resource->format, tc->clear_val); + clear_tile(tc->tile, pt->resource->format, tc->clear_val); } else { - clear_tile_rgba(&tc->tile, pt->resource->format, tc->clear_color); + clear_tile_rgba(tc->tile, pt->resource->format, tc->clear_color); } /* push the tile to all positions marked as clear */ @@ -303,12 +321,12 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) pipe_put_tile_raw(tc->pipe, pt, x, y, TILE_SIZE, TILE_SIZE, - tc->tile.data.any, 0/*STRIDE*/); + tc->tile->data.any, 0/*STRIDE*/); } else { pipe_put_tile_rgba(tc->pipe, pt, x, y, TILE_SIZE, TILE_SIZE, - (float *) tc->tile.data.color); + (float *) tc->tile->data.color); } numCleared++; } @@ -323,6 +341,27 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) #endif } +static void +sp_flush_tile(struct softpipe_tile_cache* tc, unsigned pos) +{ + if (!tc->tile_addrs[pos].bits.invalid) { + if (tc->depth_stencil) { + pipe_put_tile_raw(tc->pipe, tc->transfer, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + tc->entries[pos]->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(tc->pipe, tc->transfer, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + (float *) tc->entries[pos]->data.color); + } + tc->tile_addrs[pos].bits.invalid = 1; /* mark as empty */ + } +} /** * Flush the tile cache: write all dirty tiles back to the transfer. @@ -337,28 +376,21 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) if (pt) { /* caching a drawing transfer */ for (pos = 0; pos < NUM_ENTRIES; pos++) { - struct softpipe_cached_tile *tile = tc->entries + pos; - if (!tile->addr.bits.invalid) { - if (tc->depth_stencil) { - pipe_put_tile_raw(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - tile->data.depth32, 0/*STRIDE*/); - } - else { - pipe_put_tile_rgba(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - (float *) tile->data.color); - } - tile->addr.bits.invalid = 1; /* mark as empty */ - inuse++; + struct softpipe_cached_tile *tile = tc->entries[pos]; + if (!tile) + { + assert(tc->tile_addrs[pos].bits.invalid); + continue; } + + sp_flush_tile(tc, pos); + ++inuse; } sp_tile_cache_flush_clear(tc); + + + tc->last_tile_addr.bits.invalid = 1; } #if 0 @@ -366,6 +398,38 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) #endif } +static struct softpipe_cached_tile * +sp_alloc_tile(struct softpipe_tile_cache *tc) +{ + struct softpipe_cached_tile * tile = MALLOC_STRUCT(softpipe_cached_tile); + if (!tile) + { + /* in this case, steal an existing tile */ + if (!tc->tile) + { + unsigned pos; + for (pos = 0; pos < NUM_ENTRIES; ++pos) { + if (!tc->entries[pos]) + continue; + + sp_flush_tile(tc, pos); + tc->tile = tc->entries[pos]; + tc->entries[pos] = NULL; + break; + } + + /* this should never happen */ + if (!tc->tile) + abort(); + } + + tile = tc->tile; + tc->tile = NULL; + + tc->last_tile_addr.bits.invalid = 1; + } + return tile; +} /** * Get a tile from the cache. @@ -380,30 +444,35 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, /* cache pos/entry: */ const int pos = CACHE_POS(addr.bits.x, addr.bits.y); - struct softpipe_cached_tile *tile = tc->entries + pos; + struct softpipe_cached_tile *tile = tc->entries[pos]; + + if (!tile) { + tile = sp_alloc_tile(tc); + tc->entries[pos] = tile; + } - if (addr.value != tile->addr.value) { + if (addr.value != tc->tile_addrs[pos].value) { assert(pt->resource); - if (tile->addr.bits.invalid == 0) { + if (tc->tile_addrs[pos].bits.invalid == 0) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { pipe_put_tile_raw(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { pipe_put_tile_rgba(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } } - tile->addr = addr; + tc->tile_addrs[pos] = addr; if (is_clear_flag_set(tc->clear_flags, addr)) { /* don't get tile from framebuffer, just clear it */ @@ -419,15 +488,15 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, /* get new tile data from transfer */ if (tc->depth_stencil) { pipe_get_tile_raw(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { pipe_get_tile_rgba(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -435,6 +504,7 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, } tc->last_tile = tile; + tc->last_tile_addr = addr; return tile; } @@ -464,7 +534,7 @@ sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba, memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); for (pos = 0; pos < NUM_ENTRIES; pos++) { - struct softpipe_cached_tile *tile = tc->entries + pos; - tile->addr.bits.invalid = 1; + tc->tile_addrs[pos].bits.invalid = 1; } + tc->last_tile_addr.bits.invalid = 1; } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index e03d53eb24..031c7c1ea5 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -57,7 +57,6 @@ union tile_address { struct softpipe_cached_tile { - union tile_address addr; union { float color[TILE_SIZE][TILE_SIZE][4]; uint color32[TILE_SIZE][TILE_SIZE]; @@ -83,14 +82,16 @@ struct softpipe_tile_cache struct pipe_transfer *transfer; void *transfer_map; - struct softpipe_cached_tile entries[NUM_ENTRIES]; + union tile_address tile_addrs[NUM_ENTRIES]; + struct softpipe_cached_tile *entries[NUM_ENTRIES]; uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; float clear_color[4]; /**< for color bufs */ uint clear_val; /**< for z+stencil, or packed color clear value */ boolean depth_stencil; /**< Is the surface a depth/stencil format? */ - struct softpipe_cached_tile tile; /**< scratch tile for clears */ + struct softpipe_cached_tile *tile; /**< scratch tile for clears */ + union tile_address last_tile_addr; struct softpipe_cached_tile *last_tile; /**< most recently retrieved tile */ }; @@ -147,7 +148,7 @@ sp_get_cached_tile(struct softpipe_tile_cache *tc, { union tile_address addr = tile_address( x, y ); - if (tc->last_tile->addr.value == addr.value) + if (tc->last_tile_addr.value == addr.value) return tc->last_tile; return sp_find_cached_tile( tc, addr ); diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index 0cd620189b..781fe6334a 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -203,7 +203,7 @@ svga_tgsi_translate( const struct svga_shader *shader, emit.imm_start += key.vkey.num_zero_stride_vertex_elements; } - emit.nr_hw_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1); + emit.nr_hw_float_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1); emit.nr_hw_temp = emit.info.file_max[TGSI_FILE_TEMPORARY] + 1; emit.in_main_func = TRUE; diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index b4e90a957d..63ef7f867a 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -62,7 +62,8 @@ struct svga_shader_emitter int imm_start; - int nr_hw_const; + int nr_hw_float_const; + int nr_hw_int_const; int nr_hw_temp; int insn_offset; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 72dccdf150..f2591c5721 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -197,22 +197,37 @@ translate_src_register( const struct svga_shader_emitter *emit, break; } - /* Indirect addressing (for coninstant buffer lookups only) + /* Indirect addressing. */ - if (reg->Register.Indirect) - { - /* we shift the offset towards the minimum */ - if (svga_arl_needs_adjustment( emit )) { - src.base.num -= svga_arl_adjustment( emit ); + if (reg->Register.Indirect) { + if (emit->unit == PIPE_SHADER_FRAGMENT) { + /* Pixel shaders have only loop registers for relative + * addressing into inputs. Ignore the redundant address + * register, the contents of aL should be in sync with it. + */ + if (reg->Register.File == TGSI_FILE_INPUT) { + src.base.relAddr = 1; + src.indirect = src_token(SVGA3DREG_LOOP, 0); + } + } + else { + /* Constant buffers only. + */ + if (reg->Register.File == TGSI_FILE_CONSTANT) { + /* we shift the offset towards the minimum */ + if (svga_arl_needs_adjustment( emit )) { + src.base.num -= svga_arl_adjustment( emit ); + } + src.base.relAddr = 1; + + /* Not really sure what should go in the second token: + */ + src.indirect = src_token( SVGA3DREG_ADDR, + reg->Indirect.Index ); + + src.indirect.swizzle = SWIZZLE_XXXX; + } } - src.base.relAddr = 1; - - /* Not really sure what should go in the second token: - */ - src.indirect = src_token( SVGA3DREG_ADDR, - reg->Indirect.Index ); - - src.indirect.swizzle = SWIZZLE_XXXX; } src = swizzle( src, @@ -538,7 +553,7 @@ static boolean emit_def_const( struct svga_shader_emitter *emit, static INLINE boolean create_zero_immediate( struct svga_shader_emitter *emit ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 0, 0, 0, 1 )) @@ -553,7 +568,7 @@ create_zero_immediate( struct svga_shader_emitter *emit ) static INLINE boolean create_loop_const( struct svga_shader_emitter *emit ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_int_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx, 255, /* iteration count */ @@ -571,7 +586,7 @@ create_loop_const( struct svga_shader_emitter *emit ) static INLINE boolean create_sincos_consts( struct svga_shader_emitter *emit ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, -1.5500992e-006f, @@ -581,7 +596,7 @@ create_sincos_consts( struct svga_shader_emitter *emit ) return FALSE; emit->sincos_consts_idx = idx; - idx = emit->nr_hw_const++; + idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, -0.020833334f, @@ -602,7 +617,7 @@ create_arl_consts( struct svga_shader_emitter *emit ) for (i = 0; i < emit->num_arl_consts; i += 4) { int j; - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; float vals[4]; for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) { vals[j] = emit->arl_consts[i + j].number; @@ -1593,6 +1608,14 @@ static boolean emit_arl(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { ++emit->current_arl; + if (emit->unit == PIPE_SHADER_FRAGMENT) { + /* MOVA not present in pixel shader instruction set. + * Ignore this instruction altogether since it is + * only used for loop counters -- and for that + * we reference aL directly. + */ + return TRUE; + } if (svga_arl_needs_adjustment( emit )) { return emit_fake_arl( emit, insn ); } else { @@ -2384,7 +2407,7 @@ static boolean make_immediate( struct svga_shader_emitter *emit, float d, struct src_register *out ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, a, b, c, d )) diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 271cd4aff5..04f30f82c3 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -92,15 +92,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe, trace_dump_call_begin("pipe_context", "draw_vbo"); trace_dump_arg(ptr, pipe); - trace_dump_arg(bool, info->indexed); - trace_dump_arg(uint, info->mode); - trace_dump_arg(uint, info->start); - trace_dump_arg(uint, info->count); - trace_dump_arg(uint, info->start_instance); - trace_dump_arg(uint, info->instance_count); - trace_dump_arg(int, info->index_bias); - trace_dump_arg(uint, info->min_index); - trace_dump_arg(uint, info->max_index); + trace_dump_arg(draw_info, info); pipe->draw_vbo(pipe, info); @@ -987,24 +979,24 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, static INLINE void trace_context_set_index_buffer(struct pipe_context *_pipe, - const struct pipe_index_buffer *_ib) + const struct pipe_index_buffer *ib) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_index_buffer unwrapped_ib, *ib = NULL; - - if (_ib) { - unwrapped_ib = *_ib; - unwrapped_ib.buffer = trace_resource_unwrap(tr_ctx, _ib->buffer); - ib = &unwrapped_ib; - } trace_dump_call_begin("pipe_context", "set_index_buffer"); trace_dump_arg(ptr, pipe); - trace_dump_arg(index_buffer, _ib); + trace_dump_arg(index_buffer, ib); - pipe->set_index_buffer(pipe, ib); + if (ib) { + struct pipe_index_buffer _ib; + _ib = *ib; + _ib.buffer = trace_resource_unwrap(tr_ctx, ib->buffer); + pipe->set_index_buffer(pipe, &_ib); + } else { + pipe->set_index_buffer(pipe, NULL); + } trace_dump_call_end(); } diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index bd9a9bfaf1..8f81606032 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -573,3 +573,32 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state) trace_dump_struct_end(); } + + +void trace_dump_draw_info(const struct pipe_draw_info *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_draw_info"); + + trace_dump_member(bool, state, indexed); + + trace_dump_member(uint, state, mode); + trace_dump_member(uint, state, start); + trace_dump_member(uint, state, count); + + trace_dump_member(uint, state, start_instance); + trace_dump_member(uint, state, instance_count); + + trace_dump_member(int, state, index_bias); + trace_dump_member(uint, state, min_index); + trace_dump_member(uint, state, max_index); + + trace_dump_struct_end(); +} diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 2e70f4e1c7..078d208610 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -79,5 +79,7 @@ void trace_dump_index_buffer(const struct pipe_index_buffer *state); void trace_dump_vertex_element(const struct pipe_vertex_element *state); +void trace_dump_draw_info(const struct pipe_draw_info *state); + #endif /* TR_STATE_H */ diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 5020599591..3d6b5b5c81 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -122,6 +122,27 @@ typedef unsigned char boolean; # endif #endif +/* + * Define the C99 restrict keyword. + * + * See also: + * - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html + */ +#ifndef restrict +# if (__STDC_VERSION__ >= 199901L) + /* C99 */ +# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) + /* C99 */ +# elif defined(__GNUC__) +# define restrict __restrict__ +# elif defined(_MSC_VER) +# define restrict __restrict +# else +# define restrict /* */ +# endif +#endif + + /* Function visibility */ #ifndef PUBLIC # if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 8b4663742f..b6894c09e8 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -464,7 +464,8 @@ enum pipe_cap { PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER, - PIPE_CAP_DEPTH_CLAMP + PIPE_CAP_DEPTH_CLAMP, + PIPE_CAP_SHADER_STENCIL_EXPORT, }; /* Shader caps not specific to any single stage */ diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 06412f4894..22cc7aa18a 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -186,6 +186,10 @@ enum pipe_format { PIPE_FORMAT_R8G8B8X8_UNORM = 134, PIPE_FORMAT_B4G4R4X4_UNORM = 135, + /* some stencil samplers formats */ + PIPE_FORMAT_X24S8_USCALED = 136, + PIPE_FORMAT_S8X24_USCALED = 137, + PIPE_FORMAT_X32_S8X24_USCALED = 138, PIPE_FORMAT_COUNT }; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 74488de17e..ba433b2bd2 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -143,7 +143,8 @@ struct tgsi_declaration_dimension #define TGSI_SEMANTIC_EDGEFLAG 8 #define TGSI_SEMANTIC_PRIMID 9 #define TGSI_SEMANTIC_INSTANCEID 10 -#define TGSI_SEMANTIC_COUNT 11 /**< number of semantic values */ +#define TGSI_SEMANTIC_STENCIL 11 +#define TGSI_SEMANTIC_COUNT 12 /**< number of semantic values */ struct tgsi_declaration_semantic { diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h index 8ea1554568..21e2165ed9 100644 --- a/src/gallium/include/state_tracker/st_api.h +++ b/src/gallium/include/state_tracker/st_api.h @@ -384,7 +384,6 @@ struct st_manager * This function is optional. */ boolean (*get_egl_image)(struct st_manager *smapi, - struct st_context_iface *stctx, void *egl_image, struct st_egl_image *out); diff --git a/src/gallium/include/state_tracker/sw_winsys.h b/src/gallium/include/state_tracker/sw_winsys.h index d461dedb90..0b11fe3beb 100644 --- a/src/gallium/include/state_tracker/sw_winsys.h +++ b/src/gallium/include/state_tracker/sw_winsys.h @@ -97,7 +97,7 @@ struct sw_winsys */ struct sw_displaytarget * (*displaytarget_from_handle)( struct sw_winsys *ws, - const struct pipe_resource *template, + const struct pipe_resource *templat, struct winsys_handle *whandle, unsigned *stride ); diff --git a/src/gallium/state_trackers/d3d1x/.gitignore b/src/gallium/state_trackers/d3d1x/.gitignore new file mode 100644 index 0000000000..f23bac7176 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/.gitignore @@ -0,0 +1,20 @@ +d3d1xshader/include/sm4_defs.h +d3d1xshader/src/sm4_text.cpp +d3d1xshader/tools/fxdis +d3dapi/*.h +docs/module_dependencies.svg +docs/module_dependencies.pdf +gd3d10/*.generated.* +gd3d1x/tools/dxbc2tgsi +gd3dapi/*.h +mstools/DXSDK +mstools/*.dll +mstools/*.exe +progs/bin/* +winedlls/*/version.res +winedlls/*/*.def +*.suo +*.vcxproj.filters +*.vcxproj.user +Debug +Release diff --git a/src/gallium/state_trackers/d3d1x/Makefile b/src/gallium/state_trackers/d3d1x/Makefile new file mode 100644 index 0000000000..75076eed72 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/Makefile @@ -0,0 +1,11 @@ +SUBDIRS=d3dapi gd3dapi docs d3d1xstutil d3d1xshader gd3d1x gd3d11 gd3d10 dxgi dxgid3d11 dxgid3d10 + +all: + @for dir in $(SUBDIRS) ; do $(MAKE) -C "$$dir" || exit $?; done + +clean: + rm -f `find . -name \*.[oa]` + rm -f `find . -name depend` + +install: + diff --git a/src/gallium/state_trackers/d3d1x/Makefile.inc b/src/gallium/state_trackers/d3d1x/Makefile.inc new file mode 100644 index 0000000000..303915d5bf --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/Makefile.inc @@ -0,0 +1,19 @@ +TOP=../../../../.. +include $(TOP)/configs/current + +IDL=$(wildcard *.idl include/*.idl) +IDL_H=$(IDL:.idl=.h) +LD=$(CXX) $(CXXFLAGS) + +include ../../../Makefile.template + +idl: $(IDL_H) + +%.h: %.idl + widl -I../gd3dapi -I../d3dapi -I../w32api -U /dev/null -H $@ $^ + +%.svg: %.dot + dot -Tsvg -o $@ $< + +%.pdf: %.dot + dot -Tpdf -o $@ $< diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/Makefile b/src/gallium/state_trackers/d3d1x/d3d1xshader/Makefile new file mode 100644 index 0000000000..f132518ccc --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/Makefile @@ -0,0 +1,16 @@ +LIBNAME=d3d1xshader +CPP_SOURCES=$(wildcard src/*.cpp) src/sm4_text.cpp +LIBRARY_INCLUDES=-Iinclude -I../d3dapi -I../w32api +PROGS=tools/fxdis +PROGS_DEPS=libd3d1xshader.a +LIBS=$(PROGS_DEPS) + +include ../Makefile.inc + +include/sm4.h: include/sm4_defs.h + +include/sm4_defs.h: $(wildcard defs/*.txt) + ./gen-header.sh $^ > $@ + +src/sm4_text.cpp: $(wildcard defs/*.txt) + ./gen-text.sh $^ > $@ diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/files.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/files.txt new file mode 100644 index 0000000000..c44a46beed --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/files.txt @@ -0,0 +1,41 @@ +temp +input +output +indexable_temp +immediate32 +immediate64 +sampler +resource +constant_buffer +immediate_constant_buffer +label +input_primitiveid +output_depth +null +rasterizer +output_coverage_mask +stream +function_body +function_table +interface +function_input +function_output +output_control_point_id +input_fork_instance_id +input_join_instance_id +input_control_point +output_control_point +input_patch_constant +input_domain_point +this_pointer +unordered_access_view +thread_group_shared_memory +input_thread_id +input_thread_group_id +input_thread_id_in_group +input_coverage_mask +input_thread_id_in_group_flattened +input_gs_instance_id +output_depth_greater_equal +output_depth_less_equal +cycle_counter diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/interpolations.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/interpolations.txt new file mode 100644 index 0000000000..4e52eec34f --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/interpolations.txt @@ -0,0 +1,8 @@ +undefined +constant +linear +linear centroid +linear noperspective +linear noperspective centroid +linear sample +linear noperspective sample diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/opcodes.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/opcodes.txt new file mode 100644 index 0000000000..46ff28d6f9 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/opcodes.txt @@ -0,0 +1,207 @@ +add +and +break +breakc +call +callc +case +continue +continuec +cut +default +deriv_rtx +deriv_rty +discard +div +dp2 +dp3 +dp4 +else +emit +emitthencut +endif +endloop +endswitch +eq +exp +frc +ftoi +ftou +ge +iadd +if +ieq +ige +ilt +imad +imax +imin +imul +ine +ineg +ishl +ishr +itof +label +ld +ld_ms +log +loop +lt +mad +min +max +customdata +mov +movc +mul +ne +nop +not +or +resinfo +ret +retc +round_ne +round_ni +round_pi +round_z +rsq +sample +sample_c +sample_c_lz +sample_l +sample_d +sample_b +sqrt +switch +sincos +udiv +ult +uge +umul +umad +umax +umin +ushr +utof +xor +dcl_resource +dcl_constant_buffer +dcl_sampler +dcl_index_range +dcl_gs_output_primitive_topology +dcl_gs_input_primitive +dcl_max_output_vertex_count +dcl_input +dcl_input_sgv +dcl_input_siv +dcl_input_ps +dcl_input_ps_sgv +dcl_input_ps_siv +dcl_output +dcl_output_sgv +dcl_output_siv +dcl_temps +dcl_indexable_temp +dcl_global_flags +d3d10_count +lod +gather4 +sample_pos +sample_info +d3d10_1_count +hs_decls +hs_control_point_phase +hs_fork_phase +hs_join_phase +emit_stream +cut_stream +emitthencut_stream +interface_call +bufinfo +deriv_rtx_coarse +deriv_rtx_fine +deriv_rty_coarse +deriv_rty_fine +gather4_c +gather4_po +gather4_po_c +rcp +f32tof16 +f16tof32 +uaddc +usubb +countbits +firstbit_hi +firstbit_lo +firstbit_shi +ubfe +ibfe +bfi +bfrev +swapc +dcl_stream +dcl_function_body +dcl_function_table +dcl_interface +dcl_input_control_point_count +dcl_output_control_point_count +dcl_tess_domain +dcl_tess_partitioning +dcl_tess_output_primitive +dcl_hs_max_tessfactor +dcl_hs_fork_phase_instance_count +dcl_hs_join_phase_instance_count +dcl_thread_group +dcl_unordered_access_view_typed +dcl_unordered_access_view_raw +dcl_unordered_access_view_structured +dcl_thread_group_shared_memory_raw +dcl_thread_group_shared_memory_structured +dcl_resource_raw +dcl_resource_structured +ld_uav_typed +store_uav_typed +ld_raw +store_raw +ld_structured +store_structured +atomic_and +atomic_or +atomic_xor +atomic_cmp_store +atomic_iadd +atomic_imax +atomic_imin +atomic_umax +atomic_umin +imm_atomic_alloc +imm_atomic_consume +imm_atomic_iadd +imm_atomic_and +imm_atomic_or +imm_atomic_xor +imm_atomic_exch +imm_atomic_cmp_exch +imm_atomic_imax +imm_atomic_imin +imm_atomic_umax +imm_atomic_umin +sync +dadd +dmax +dmin +dmul +deq +dge +dlt +dne +dmov +dmovc +dtof +ftod +eval_snapped +eval_sample_index +eval_centroid +dcl_gs_instance_count diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/operand_compnums.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/operand_compnums.txt new file mode 100644 index 0000000000..887df2b141 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/operand_compnums.txt @@ -0,0 +1,5 @@ +0 +1 +4 +n + diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/operand_index_reprs.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/operand_index_reprs.txt new file mode 100644 index 0000000000..f1ce172aaf --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/operand_index_reprs.txt @@ -0,0 +1,5 @@ +imm32 +imm64 +reg +reg_imm32 +reg_imm64 diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/operand_modes.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/operand_modes.txt new file mode 100644 index 0000000000..4088957e98 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/operand_modes.txt @@ -0,0 +1,4 @@ +mask +swizzle +scalar + diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/shortfiles.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/shortfiles.txt new file mode 100644 index 0000000000..9e2d303ccd --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/shortfiles.txt @@ -0,0 +1,41 @@ +r +v +o +x +l +d +sampler +resource +cb +icb +label +vPrim +oDepth +null +rasterizer +oMask +stream +function_body +function_table +interface +function_input +function_output +vOutputControlPointID +vForkInstanceID +vJoinInstanceID +vicp +vocp +input_patch_constant +vDomain +this +u +g +vThreadID +vThreadGrouID +vThreadIDInGroup +vCoverage +vThreadIDInGroupFlattened +vGSInstanceID +oDepthGE +oDepthLE +vCycleCounter diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/svs.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/svs.txt new file mode 100644 index 0000000000..c7148ec301 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/svs.txt @@ -0,0 +1,23 @@ +undefined +position +clip_distance +cull_distance +render_target_array_index +viewport_array_index +vertex_id +primitive_id +instance_id +is_front_face +sample_index +final_quad_u_eq_0_edge_tessfactor +final_quad_v_eq_0_edge_tessfactor +final_quad_u_eq_1_edge_tessfactor +final_quad_v_eq_1_edge_tessfactor +final_quad_u_inside_tessfactor +final_quad_v_inside_tessfactor +final_tri_u_eq_0_edge_tessfactor +final_tri_v_eq_0_edge_tessfactor +final_tri_w_eq_0_edge_tessfactor +final_tri_inside_tessfactor +final_line_detail_tessfactor +final_line_density_tessfactor diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/targets.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/targets.txt new file mode 100644 index 0000000000..d3bc186c54 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/targets.txt @@ -0,0 +1,13 @@ +unknown +buffer +texture1d +texture2d +texture2dms +texture3d +texturecube +texture1darray +texture2darray +texture2dmsarray +texturecubearray +raw_buffer +structured_buffer diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/token_instruction_extended_types.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/token_instruction_extended_types.txt new file mode 100644 index 0000000000..e8fd70c480 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/token_instruction_extended_types.txt @@ -0,0 +1,4 @@ +empty +sample_controls +resource_dim +resource_return_type diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/token_operand_extended_types.txt b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/token_operand_extended_types.txt new file mode 100644 index 0000000000..891fcafa67 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/token_operand_extended_types.txt @@ -0,0 +1,2 @@ +empty +modifier diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/gen-header.sh b/src/gallium/state_trackers/d3d1x/d3d1xshader/gen-header.sh new file mode 100755 index 0000000000..fcda13f907 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/gen-header.sh @@ -0,0 +1,13 @@ +#!/bin/bash +for i in "$@"; do + n=$(basename "$i" .txt|sed -e 's/s$//') + if test "$n" == "shortfile"; then continue; fi + echo "enum sm4_$n" + echo "{" + while read j; do + echo $'\t'"SM4_${n}_$j", + done < "$i" |tr '[a-z]' '[A-Z]'|tr ' ' '_' + echo $'\t'"SM4_${n}_COUNT"|tr '[a-z]' '[A-Z]' + echo "};" + echo +done diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/gen-text.sh b/src/gallium/state_trackers/d3d1x/d3d1xshader/gen-text.sh new file mode 100755 index 0000000000..4663f635d4 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/gen-text.sh @@ -0,0 +1,11 @@ +#!/bin/bash +for i in "$@"; do + n=$(basename "$i" .txt|sed -e 's/s$//') + echo "const char* sm4_${n}_names[] =" + echo "{" + while read j; do + echo $'\t'"\"$j\"", + done < "$i" + echo "};" + echo +done diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/include/dxbc.h b/src/gallium/state_trackers/d3d1x/d3d1xshader/include/dxbc.h new file mode 100644 index 0000000000..5c7c87e5e8 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/include/dxbc.h @@ -0,0 +1,112 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DXBC_H_ +#define DXBC_H_ + +#include <stdint.h> +#include <vector> +#include <map> +#include <iostream> +#include "le32.h" + +#define FOURCC(a, b, c, d) ((uint32_t)(uint8_t)(a) | ((uint32_t)(uint8_t)(b) << 8) | ((uint32_t)(uint8_t)(c) << 16) | ((uint32_t)(uint8_t)(d) << 24 )) +#define FOURCC_DXBC FOURCC('D', 'X', 'B', 'C') +#define FOURCC_RDEF FOURCC('R', 'D', 'E', 'F') +#define FOURCC_ISGN FOURCC('I', 'S', 'G', 'N') +#define FOURCC_OSGN FOURCC('O', 'S', 'G', 'N') +#define FOURCC_SHDR FOURCC('S', 'H', 'D', 'R') +#define FOURCC_SHEX FOURCC('S', 'H', 'E', 'X') +#define FOURCC_STAT FOURCC('S', 'T', 'A', 'T') + +/* this is always little-endian! */ +struct dxbc_chunk_header +{ + unsigned fourcc; + unsigned size; +}; + +/* this is always little-endian! */ +struct dxbc_chunk_signature : public dxbc_chunk_header +{ + uint32_t count; + uint32_t unk; + struct + { + uint32_t name_offset; + uint32_t semantic_index; + uint32_t system_value_type; + uint32_t component_type; + uint32_t register_num; + uint8_t mask; + uint8_t read_write_mask; + uint8_t stream; /* TODO: guess! */ + uint8_t unused; + } elements[]; +}; + +struct dxbc_container +{ + const void* data; + std::vector<dxbc_chunk_header*> chunks; + std::map<unsigned, unsigned> chunk_map; +}; + +struct dxbc_container_header +{ + unsigned fourcc; + uint32_t unk[4]; + uint32_t one; + uint32_t total_size; + uint32_t chunk_count; +}; + +dxbc_container* dxbc_parse(const void* data, int size); +std::ostream& operator <<(std::ostream& out, const dxbc_container& container); + +dxbc_chunk_header* dxbc_find_chunk(const void* data, int size, unsigned fourcc); + +static inline dxbc_chunk_header* dxbc_find_shader_bytecode(const void* data, int size) +{ + dxbc_chunk_header* chunk; + chunk = dxbc_find_chunk(data, size, FOURCC_SHDR); + if(!chunk) + chunk = dxbc_find_chunk(data, size, FOURCC_SHEX); + return chunk; +} + +static inline dxbc_chunk_signature* dxbc_find_signature(const void* data, int size, bool output) +{ + return (dxbc_chunk_signature*)dxbc_find_chunk(data, size, output ? FOURCC_OSGN : FOURCC_ISGN); +} + +struct _D3D11_SIGNATURE_PARAMETER_DESC; +typedef struct _D3D11_SIGNATURE_PARAMETER_DESC D3D11_SIGNATURE_PARAMETER_DESC; +int dxbc_parse_signature(dxbc_chunk_signature* sig, D3D11_SIGNATURE_PARAMETER_DESC** params); + +std::pair<void*, size_t> dxbc_assemble(struct dxbc_chunk_header** chunks, unsigned num_chunks); + +#endif /* DXBC_H_ */ diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/include/le32.h b/src/gallium/state_trackers/d3d1x/d3d1xshader/include/le32.h new file mode 100644 index 0000000000..923942a778 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/include/le32.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LE32_H_ +#define LE32_H_ + +#include <stdint.h> +#include <assert.h> + +#ifdef WORDS_BIGENDIAN +static inline uint32_t bswap_le32(uint32_t v) +{ + return ((v & 0xff) << 24) | ((v & 0xff00) << 8) | ((v & 0xff0000) >> 8) | ((v & 0xff000000) >> 24); +} +#else +static inline uint32_t bswap_le32(uint32_t v) +{ + return v; +} +#endif + +#endif /* LE32_H_ */ diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/include/sm4.h b/src/gallium/state_trackers/d3d1x/d3d1xshader/include/sm4.h new file mode 100644 index 0000000000..d3ca2742a9 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/include/sm4.h @@ -0,0 +1,410 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Header for Shader Model 4.0, 4.1 and 5.0 */ + +#ifndef SM4_H_ +#define SM4_H_ + +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <memory> +#include <vector> +#include <map> +#include <iostream> +#include "le32.h" + +#include "sm4_defs.h" + +extern const char* sm4_opcode_names[]; +extern const char* sm4_file_names[]; +extern const char* sm4_shortfile_names[]; +extern const char* sm4_target_names[]; +extern const char* sm4_interpolation_names[]; +extern const char* sm4_sv_names[]; + +struct sm4_token_version +{ + unsigned minor : 4; + unsigned major : 4; + unsigned format : 8; + unsigned type : 16; +}; + +struct sm4_token_instruction +{ + // we don't make it an union directly because unions can't be inherited from + union + { + // length and extended are always present, but they are only here to reduce duplication + struct + { + unsigned opcode : 11; + unsigned _11_23 : 13; + unsigned length : 7; + unsigned extended : 1; + }; + struct + { + unsigned opcode : 11; + unsigned resinfo_return_type : 2; + unsigned sat : 1; + unsigned _14_17 : 4; + unsigned test_nz : 1; // bit 18 + unsigned precise_mask : 4; + unsigned _23 : 1; + unsigned length : 7; + unsigned extended : 1; + } insn; + struct + { + unsigned opcode : 11; + unsigned threads_in_group : 1; + unsigned shared_memory : 1; + unsigned uav_group : 1; + unsigned uav_global : 1; + unsigned _15_17 : 3; + } sync; + struct + { + unsigned opcode : 11; + unsigned allow_refactoring : 1; + unsigned fp64 : 1; + unsigned early_depth_stencil : 1; + unsigned enable_raw_and_structured_in_non_cs : 1; + } dcl_global_flags; + struct + { + unsigned opcode : 11; + unsigned target : 5; + unsigned nr_samples : 7; + } dcl_resource; + struct + { + unsigned opcode : 11; + unsigned shadow : 1; + unsigned mono : 1; + } dcl_sampler; + struct + { + unsigned opcode : 11; + unsigned interpolation : 5; + } dcl_input_ps; + struct + { + unsigned opcode : 11; + unsigned dynamic : 1; + } dcl_constant_buffer; + struct + { + unsigned opcode : 11; + unsigned primitive : 6; + } dcl_gs_input_primitive; + struct + { + unsigned opcode : 11; + unsigned primitive_topology : 7; + } dcl_gs_output_primitive_topology; + struct + { + unsigned opcode : 11; + unsigned control_points : 6; + } dcl_input_control_point_count; + struct + { + unsigned opcode : 11; + unsigned control_points : 6; + } dcl_output_control_point_count; + struct + { + unsigned opcode : 11; + unsigned domain : 3; /* D3D_TESSELLATOR_DOMAIN */ + } dcl_tess_domain; + struct + { + unsigned opcode : 11; + unsigned partitioning : 3; /* D3D_TESSELLATOR_PARTITIONING */ + } dcl_tess_partitioning; + struct + { + unsigned opcode : 11; + unsigned primitive : 3; /* D3D_TESSELLATOR_OUTPUT_PRIMITIVE */ + } dcl_tess_output_primitive; + }; +}; + +union sm4_token_instruction_extended +{ + struct + { + unsigned type : 6; + unsigned _6_30 : 25; + unsigned extended :1; + }; + struct + { + unsigned type : 6; + unsigned _6_8 : 3; + int offset_u : 4; + int offset_v : 4; + int offset_w : 4; + } sample_controls; + struct + { + unsigned type : 6; + unsigned target : 5; + } resource_target; + struct + { + unsigned type : 6; + unsigned x : 4; + unsigned y : 4; + unsigned z : 4; + unsigned w : 4; + } resource_return_type; +}; + +struct sm4_token_resource_return_type +{ + unsigned x : 4; + unsigned y : 4; + unsigned z : 4; + unsigned w : 4; +}; + +struct sm4_token_operand +{ + unsigned comps_enum : 2; /* sm4_operands_comps */ + unsigned mode : 2; /* sm4_operand_mode */ + unsigned sel : 8; + unsigned file : 8; /* sm4_file */ + unsigned num_indices : 2; + unsigned index0_repr : 3; /* sm4_operand_index_repr */ + unsigned index1_repr : 3; /* sm4_operand_index_repr */ + unsigned index2_repr : 3; /* sm4_operand_index_repr */ + unsigned extended : 1; +}; + +#define SM4_OPERAND_SEL_MASK(sel) ((sel) & 0xf) +#define SM4_OPERAND_SEL_SWZ(sel, i) (((sel) >> ((i) * 2)) & 3) +#define SM4_OPERAND_SEL_SCALAR(sel) ((sel) & 3) + +struct sm4_token_operand_extended +{ + unsigned type : 6; + unsigned neg : 1; + unsigned abs : 1; +}; + +union sm4_any +{ + double f64; + float f32; + int64_t i64; + int32_t i32; + uint64_t u64; + int64_t u32; +}; + +struct sm4_op; +struct sm4_insn; +struct sm4_dcl; +struct sm4_program; +std::ostream& operator <<(std::ostream& out, const sm4_op& op); +std::ostream& operator <<(std::ostream& out, const sm4_insn& op); +std::ostream& operator <<(std::ostream& out, const sm4_dcl& op); +std::ostream& operator <<(std::ostream& out, const sm4_program& op); + +struct sm4_op +{ + uint8_t mode; + uint8_t comps; + uint8_t mask; + uint8_t num_indices; + uint8_t swizzle[4]; + sm4_file file; + sm4_any imm_values[4]; + bool neg; + bool abs; + struct + { + int64_t disp; + std::auto_ptr<sm4_op> reg; + } indices[3]; + + bool is_index_simple(unsigned i) const + { + return !indices[i].reg.get() && indices[i].disp >= 0 && (int64_t)(int32_t)indices[i].disp == indices[i].disp; + } + + bool has_simple_index() const + { + return num_indices == 1 && is_index_simple(0); + } + + sm4_op() + { + memset(this, 0, sizeof(*this)); + } + + void dump(); + +private: + sm4_op(const sm4_op& op) + {} +}; + +/* for sample_d */ +#define SM4_MAX_OPS 6 + +struct sm4_insn : public sm4_token_instruction +{ + int8_t sample_offset[3]; + uint8_t resource_target; + uint8_t resource_return_type[4]; + + unsigned num; + unsigned num_ops; + std::auto_ptr<sm4_op> ops[SM4_MAX_OPS]; + + sm4_insn() + { + memset(this, 0, sizeof(*this)); + } + + void dump(); + +private: + sm4_insn(const sm4_insn& op) + {} +}; + +struct sm4_dcl : public sm4_token_instruction +{ + std::auto_ptr<sm4_op> op; + union + { + unsigned num; + float f32; + sm4_sv sv; + struct + { + unsigned id; + unsigned expected_function_table_length; + unsigned table_length; + unsigned array_length; + } intf; + unsigned thread_group_size[3]; + sm4_token_resource_return_type rrt; + struct + { + unsigned num; + unsigned comps; + } indexable_temp; + struct + { + unsigned stride; + unsigned count; + } structured; + }; + + void* data; + + sm4_dcl() + { + memset(this, 0, sizeof(*this)); + } + + ~sm4_dcl() + { + free(data); + } + + void dump(); + +private: + sm4_dcl(const sm4_dcl& op) + {} +}; + +struct sm4_program +{ + sm4_token_version version; + std::vector<sm4_dcl*> dcls; + std::vector<sm4_insn*> insns; + + /* for ifs, the insn number of the else or endif if there is no else + * for elses, the insn number of the endif + * for endifs, the insn number of the if + * for loops, the insn number of the endloop + * for endloops, the insn number of the loop + * for all others, -1 + */ + std::vector<int> cf_insn_linked; + + /* NOTE: sampler 0 is the unnormalized nearest sampler for LD/LD_MS, while + * sampler 1 is user-specified sampler 0 + */ + bool resource_sampler_slots_assigned; + std::vector<int> slot_to_resource; + std::vector<int> slot_to_sampler; + std::map<std::pair<int, int>, int> resource_sampler_to_slot; + std::map<int, int> resource_to_slot; + + bool labels_found; + std::vector<int> label_to_insn_num; + + sm4_program() + { + memset(&version, 0, sizeof(version)); + labels_found = false; + resource_sampler_slots_assigned = false; + } + + ~sm4_program() + { + for(std::vector<sm4_dcl*>::iterator i = dcls.begin(), e = dcls.end(); i != e; ++i) + delete *i; + for(std::vector<sm4_insn*>::iterator i = insns.begin(), e = insns.end(); i != e; ++i) + delete *i; + } + + void dump(); + +private: + sm4_program(const sm4_dcl& op) + {} +}; + +sm4_program* sm4_parse(void* tokens, int size); + +bool sm4_link_cf_insns(sm4_program& program); +bool sm4_find_labels(sm4_program& program); +bool sm4_allocate_resource_sampler_pairs(sm4_program& program); + +#endif /* SM4_H_ */ + diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/dxbc_assemble.cpp b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/dxbc_assemble.cpp new file mode 100644 index 0000000000..1021a8a1bd --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/dxbc_assemble.cpp @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include "dxbc.h" + +std::pair<void*, size_t> dxbc_assemble(struct dxbc_chunk_header** chunks, unsigned num_chunks) +{ + size_t data_size = 0; + for(unsigned i = 0; i < num_chunks; ++i) + data_size += sizeof(uint32_t) + sizeof(dxbc_chunk_header) + bswap_le32(chunks[i]->size); + + size_t total_size = sizeof(dxbc_container_header) + data_size; + dxbc_container_header* header = (dxbc_container_header*)malloc(total_size); + if(!header) + return std::make_pair((void*)0, 0); + + header->fourcc = bswap_le32(FOURCC_DXBC); + memset(header->unk, 0, sizeof(header->unk)); + header->one = bswap_le32(1); + header->total_size = bswap_le32(total_size); + header->chunk_count = num_chunks; + + uint32_t* chunk_offsets = (uint32_t*)(header + 1); + uint32_t off = sizeof(struct dxbc_container_header) + num_chunks * sizeof(uint32_t); + for(unsigned i = 0; i < num_chunks; ++i) + { + chunk_offsets[i] = bswap_le32(off); + unsigned chunk_full_size = sizeof(dxbc_chunk_header) + bswap_le32(chunks[i]->size); + memcpy((char*)header + off, chunks[i], chunk_full_size); + off += chunk_full_size; + } + + return std::make_pair((void*)header, total_size); +} diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/dxbc_dump.cpp b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/dxbc_dump.cpp new file mode 100644 index 0000000000..a3feec7446 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/dxbc_dump.cpp @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <memory> +#include <string.h> +#include <iomanip> +#include "dxbc.h" + +std::ostream& operator <<(std::ostream& out, const dxbc_container& container) +{ + for(unsigned i = 0; i < container.chunks.size(); ++i) + { + struct dxbc_chunk_header* chunk = container.chunks[i]; + char fourcc_str[5]; + memcpy(fourcc_str, &chunk->fourcc, 4); + fourcc_str[4] = 0; + out << "# DXBC chunk " << std::setw(2) << i << ": " << fourcc_str << " offset " << ((char*)chunk - (char*)container.data) << " size " << bswap_le32(chunk->size) << "\n"; + } + return out; +} diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/dxbc_parse.cpp b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/dxbc_parse.cpp new file mode 100644 index 0000000000..4903e2c3b9 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/dxbc_parse.cpp @@ -0,0 +1,86 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <memory> +#include "dxbc.h" +#include <d3d11shader.h> +#include <d3dcommon.h> + +dxbc_container* dxbc_parse(const void* data, int size) +{ + std::auto_ptr<dxbc_container> container(new dxbc_container()); + container->data = data; + dxbc_container_header* header = (dxbc_container_header*)data; + uint32_t* chunk_offsets = (uint32_t*)(header + 1); + if(bswap_le32(header->fourcc) != FOURCC_DXBC) + return 0; + unsigned num_chunks = bswap_le32(header->chunk_count); + for(unsigned i = 0; i < num_chunks; ++i) + { + unsigned offset = bswap_le32(chunk_offsets[i]); + dxbc_chunk_header* chunk = (dxbc_chunk_header*)((char*)data + offset); + unsigned fourcc = bswap_le32(chunk->fourcc); + container->chunk_map[fourcc] = i; + container->chunks.push_back(chunk); + } + return container.release(); +} + +dxbc_chunk_header* dxbc_find_chunk(const void* data, int size, unsigned fourcc) +{ + dxbc_container_header* header = (dxbc_container_header*)data; + uint32_t* chunk_offsets = (uint32_t*)(header + 1); + if(bswap_le32(header->fourcc) != FOURCC_DXBC) + return 0; + unsigned num_chunks = bswap_le32(header->chunk_count); + for(unsigned i = 0; i < num_chunks; ++i) + { + unsigned offset = bswap_le32(chunk_offsets[i]); + dxbc_chunk_header* chunk = (dxbc_chunk_header*)((char*)data + offset); + if(bswap_le32(chunk->fourcc) == fourcc) + return chunk; + } + return 0; +} + +int dxbc_parse_signature(dxbc_chunk_signature* sig, D3D11_SIGNATURE_PARAMETER_DESC** params) +{ + unsigned count = bswap_le32(sig->count); + *params = (D3D11_SIGNATURE_PARAMETER_DESC*)malloc(sizeof(D3D11_SIGNATURE_PARAMETER_DESC) * count); + + for (unsigned i = 0; i < count; ++i) + { + D3D11_SIGNATURE_PARAMETER_DESC& param = (*params)[i]; + param.SemanticName = (char*)&sig->count + bswap_le32(sig->elements[i].name_offset); + param.SemanticIndex = bswap_le32(sig->elements[i].semantic_index); + param.SystemValueType = (D3D_NAME)bswap_le32(sig->elements[i].system_value_type); + param.ComponentType = (D3D_REGISTER_COMPONENT_TYPE)bswap_le32(sig->elements[i].component_type); + param.Mask = sig->elements[i].mask; + param.ReadWriteMask = sig->elements[i].read_write_mask; + param.Stream = sig->elements[i].stream; + } + return count; +} diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_analyze.cpp b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_analyze.cpp new file mode 100644 index 0000000000..7903d547f1 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_analyze.cpp @@ -0,0 +1,186 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <vector> +#include <set> +#include "sm4.h" + +#define check(x) do {if(!(x)) return false;} while(0) + +bool sm4_link_cf_insns(sm4_program& program) +{ + if(program.cf_insn_linked.size()) + return true; + + std::vector<int> cf_insn_linked; + cf_insn_linked.resize(program.insns.size()); + memset(&cf_insn_linked[0], 0xff, cf_insn_linked.size() * sizeof(int)); + std::vector<unsigned> cf_stack; + for(unsigned insn_num = 0; insn_num < program.insns.size(); ++insn_num) + { + unsigned v; + switch(program.insns[insn_num]->opcode) + { + case SM4_OPCODE_LOOP: + cf_stack.push_back(insn_num); + break; + case SM4_OPCODE_ENDLOOP: + check(!cf_stack.empty()); + v = cf_stack.back(); + check(program.insns[v]->opcode == SM4_OPCODE_LOOP); + cf_insn_linked[v] = insn_num; + cf_insn_linked[insn_num] = v; + cf_stack.pop_back(); + break; + case SM4_OPCODE_IF: + case SM4_OPCODE_SWITCH: + cf_insn_linked[insn_num] = insn_num; // later changed + cf_stack.push_back(insn_num); + break; + case SM4_OPCODE_ELSE: + case SM4_OPCODE_CASE: + check(!cf_stack.empty()); + v = cf_stack.back(); + if(program.insns[insn_num]->opcode == SM4_OPCODE_ELSE) + check(program.insns[v]->opcode == SM4_OPCODE_IF); + else + check(program.insns[v]->opcode == SM4_OPCODE_SWITCH || program.insns[v]->opcode == SM4_OPCODE_CASE); + cf_insn_linked[insn_num] = cf_insn_linked[v]; // later changed + cf_insn_linked[v] = insn_num; + cf_stack.back() = insn_num; + break; + case SM4_OPCODE_ENDSWITCH: + case SM4_OPCODE_ENDIF: + check(!cf_stack.empty()); + v = cf_stack.back(); + if(program.insns[insn_num]->opcode == SM4_OPCODE_ENDIF) + check(program.insns[v]->opcode == SM4_OPCODE_IF || program.insns[v]->opcode == SM4_OPCODE_ELSE); + else + check(program.insns[v]->opcode == SM4_OPCODE_SWITCH || program.insns[v]->opcode == SM4_OPCODE_CASE); + cf_insn_linked[insn_num] = cf_insn_linked[v]; + cf_insn_linked[v] = insn_num; + cf_stack.pop_back(); + break; + } + } + check(cf_stack.empty()); + program.cf_insn_linked.swap(cf_insn_linked); + return true; +} + +bool sm4_find_labels(sm4_program& program) +{ + if(program.labels_found) + return true; + + std::vector<int> labels; + for(unsigned insn_num = 0; insn_num < program.insns.size(); ++insn_num) + { + switch(program.insns[insn_num]->opcode) + { + case SM4_OPCODE_LABEL: + if(program.insns[insn_num]->num_ops > 0) + { + sm4_op& op = *program.insns[insn_num]->ops[0]; + if(op.file == SM4_FILE_LABEL && op.has_simple_index()) + { + unsigned idx = (unsigned)op.indices[0].disp; + if(idx >= labels.size()) + labels.resize(idx + 1); + labels[idx] = insn_num; + } + } + break; + } + } + program.label_to_insn_num.swap(labels); + program.labels_found = true; + return true; +} + +bool sm4_allocate_resource_sampler_pairs(sm4_program& program) +{ + if(program.resource_sampler_slots_assigned) + return true; + + std::set<std::pair<int, int> > pairs; + std::set<int> resinfos; + + for(unsigned insn_num = 0; insn_num < program.insns.size(); ++insn_num) + { + int resource = -1; + int sampler = -2; + for(unsigned i = 0; i < program.insns[insn_num]->num_ops; ++i) + { + sm4_op* op = program.insns[insn_num]->ops[i].get(); + if(op) + { + if(op->file == SM4_FILE_RESOURCE) + { + if(!op->has_simple_index() || resource >= 0) + return false; + resource = (int)op->indices[0].disp; + } + if(op->file == SM4_FILE_SAMPLER) + { + if(!op->has_simple_index() || sampler >= 0) + return false; + sampler = (int)op->indices[0].disp; + } + } + } + + unsigned opcode = program.insns[insn_num]->opcode; + if(opcode == SM4_OPCODE_LD || opcode == SM4_OPCODE_LD_MS) + sampler = -1; + if(sampler >= -1 && resource >= 0) + pairs.insert(std::make_pair(resource, sampler)); + if(opcode == SM4_OPCODE_RESINFO) + resinfos.insert(resource); + } + + for(std::set<std::pair<int, int> >::iterator i = pairs.begin(); i != pairs.end(); ++i) + { + program.resource_sampler_to_slot[*i] = program.slot_to_resource.size(); + if(!program.resource_to_slot.count(i->first)) + { + program.resource_to_slot[i->first] = program.slot_to_resource.size(); + resinfos.erase(i->first); + } + program.slot_to_resource.push_back(i->first); + program.slot_to_sampler.push_back(i->second); + } + + for(std::set<int>::iterator i = resinfos.begin(); i != resinfos.end(); ++i) + { + program.resource_sampler_to_slot[std::make_pair(*i, -1)] = program.slot_to_resource.size(); + program.resource_to_slot[*i] = program.slot_to_resource.size(); + program.slot_to_resource.push_back(*i); + program.slot_to_sampler.push_back(-1); + } + program.resource_sampler_slots_assigned = true; + return true; +} diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_dump.cpp b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_dump.cpp new file mode 100644 index 0000000000..746d7c8927 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_dump.cpp @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "sm4.h" + +// TODO: we should fix this to output the same syntax as fxc, if sm4_dump_short_syntax is set + +bool sm4_dump_short_syntax = true; + +std::ostream& operator <<(std::ostream& out, const sm4_op& op) +{ + if(op.neg) + out << '-'; + if(op.abs) + out << '|'; + if(op.file == SM4_FILE_IMMEDIATE32) + { + out << "l("; + for(unsigned i = 0; i < op.comps; ++i) + { + if(i) + out << ", "; + out << op.imm_values[i].f32; + } + out << ")"; + } + else if(op.file == SM4_FILE_IMMEDIATE64) + { + out << "d("; + for(unsigned i = 0; i < op.comps; ++i) + { + if(i) + out << ", "; + out << op.imm_values[i].f64; + } + out << ")"; + return out; + } + else + { + bool naked = false; + if(sm4_dump_short_syntax) + { + switch(op.file) + { + case SM4_FILE_TEMP: + case SM4_FILE_INPUT: + case SM4_FILE_OUTPUT: + case SM4_FILE_CONSTANT_BUFFER: + case SM4_FILE_INDEXABLE_TEMP: + case SM4_FILE_UNORDERED_ACCESS_VIEW: + case SM4_FILE_THREAD_GROUP_SHARED_MEMORY: + naked = true; + break; + default: + naked = false; + break; + } + } + + out << (sm4_dump_short_syntax ? sm4_shortfile_names : sm4_file_names)[op.file]; + + if(op.indices[0].reg.get()) + naked = false; + + for(unsigned i = 0; i < op.num_indices; ++i) + { + if(!naked || i) + out << '['; + if(op.indices[i].reg.get()) + { + out << *op.indices[i].reg; + if(op.indices[i].disp) + out << '+' << op.indices[i].disp; + } + else + out << op.indices[i].disp; + if(!naked || i) + out << ']'; + } + if(op.comps) + { + switch(op.mode) + { + case SM4_OPERAND_MODE_MASK: + out << (sm4_dump_short_syntax ? '.' : '!'); + for(unsigned i = 0; i < op.comps; ++i) + { + if(op.mask & (1 << i)) + out << "xyzw"[i]; + } + break; + case SM4_OPERAND_MODE_SWIZZLE: + out << '.'; + for(unsigned i = 0; i < op.comps; ++i) + out << "xyzw"[op.swizzle[i]]; + break; + case SM4_OPERAND_MODE_SCALAR: + out << (sm4_dump_short_syntax ? '.' : ':'); + out << "xyzw"[op.swizzle[0]]; + break; + } + } + } + if(op.abs) + out << '|'; + return out; +} + +std::ostream& operator <<(std::ostream& out, const sm4_dcl& dcl) +{ + out << sm4_opcode_names[dcl.opcode]; + switch(dcl.opcode) + { + case SM4_OPCODE_DCL_GLOBAL_FLAGS: + if(dcl.dcl_global_flags.allow_refactoring) + out << " refactoringAllowed"; + if(dcl.dcl_global_flags.early_depth_stencil) + out << " forceEarlyDepthStencil"; + if(dcl.dcl_global_flags.fp64) + out << " enableDoublePrecisionFloatOps"; + if(dcl.dcl_global_flags.enable_raw_and_structured_in_non_cs) + out << " enableRawAndStructuredBuffers"; + break; + case SM4_OPCODE_DCL_INPUT_PS: + case SM4_OPCODE_DCL_INPUT_PS_SIV: + case SM4_OPCODE_DCL_INPUT_PS_SGV: + out << ' ' << sm4_interpolation_names[dcl.dcl_input_ps.interpolation]; + break; + case SM4_OPCODE_DCL_TEMPS: + out << ' ' << dcl.num; + break; + default: + break; + } + if(dcl.op.get()) + out << ' ' << *dcl.op; + switch(dcl.opcode) + { + case SM4_OPCODE_DCL_CONSTANT_BUFFER: + out << ", " << (dcl.dcl_constant_buffer.dynamic ? "dynamicIndexed" : "immediateIndexed"); + break; + case SM4_OPCODE_DCL_INPUT_SIV: + case SM4_OPCODE_DCL_INPUT_SGV: + case SM4_OPCODE_DCL_OUTPUT_SIV: + case SM4_OPCODE_DCL_OUTPUT_SGV: + case SM4_OPCODE_DCL_INPUT_PS_SIV: + case SM4_OPCODE_DCL_INPUT_PS_SGV: + out << ", " << sm4_sv_names[dcl.num]; + break; + } + + return out; +} + +std::ostream& operator <<(std::ostream& out, const sm4_insn& insn) +{ + out << sm4_opcode_names[insn.opcode]; + if(insn.insn.sat) + out << "_sat"; + for(unsigned i = 0; i < insn.num_ops; ++i) + { + if(i) + out << ','; + out << ' ' << *insn.ops[i]; + } + return out; +} + +std::ostream& operator <<(std::ostream& out, const sm4_program& program) +{ + out << "pvghdc"[program.version.type] << "s_" << program.version.major << "_" << program.version.minor << "\n"; + for(unsigned i = 0; i < program.dcls.size(); ++i) + out << *program.dcls[i] << "\n"; + + for(unsigned i = 0; i < program.insns.size(); ++i) + out << *program.insns[i] << "\n"; + return out; +} + +void sm4_op::dump() +{ + std::cout << *this; +} + +void sm4_insn::dump() +{ + std::cout << *this; +} + +void sm4_dcl::dump() +{ + std::cout << *this; +} + +void sm4_program::dump() +{ + std::cout << *this; +} diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_parse.cpp b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_parse.cpp new file mode 100644 index 0000000000..2c0f8269af --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_parse.cpp @@ -0,0 +1,424 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "sm4.h" +#include "utils.h" + +#if 1 +#define check(x) assert(x) +#define fail(x) assert(0 && (x)) +#else +#define check(x) do {if(!(x)) throw(#x);} while(0) +#define fail(x) throw(x) +#endif + +struct sm4_parser +{ + unsigned* tokens; + unsigned* tokens_end; + sm4_program& program; + + sm4_parser(sm4_program& program, void* p_tokens, unsigned size) + : program(program) + { + tokens = (unsigned*)p_tokens; + tokens_end = (unsigned*)((char*)p_tokens + size); + } + + /* TODO: byteswap if machine is big endian */ + uint32_t read32() + { + check(tokens < tokens_end); + return bswap_le32(*tokens++); + } + + template<typename T> + void read_token(T* tok) + { + *(unsigned*)tok = read32(); + } + + uint64_t read64() + { + unsigned a = read32(); + unsigned b = read32(); + return (uint64_t)a | ((uint64_t)b << 32); + } + + void skip(unsigned toskip) + { + tokens += toskip; + } + + void read_op(sm4_op* pop) + { + sm4_op& op = *pop; + sm4_token_operand optok; + read_token(&optok); + assert(optok.file < SM4_FILE_COUNT); + op.swizzle[0] = 0; + op.swizzle[1] = 1; + op.swizzle[2] = 2; + op.swizzle[3] = 3; + op.mask = 0xf; + switch(optok.comps_enum) + { + case SM4_OPERAND_COMPNUM_0: + op.comps = 0; + break; + case SM4_OPERAND_COMPNUM_1: + op.comps = 1; + break; + case SM4_OPERAND_COMPNUM_4: + op.comps = 4; + op.mode = optok.mode; + switch(optok.mode) + { + case SM4_OPERAND_MODE_MASK: + op.mask = SM4_OPERAND_SEL_MASK(optok.sel); + break; + case SM4_OPERAND_MODE_SWIZZLE: + op.swizzle[0] = SM4_OPERAND_SEL_SWZ(optok.sel, 0); + op.swizzle[1] = SM4_OPERAND_SEL_SWZ(optok.sel, 1); + op.swizzle[2] = SM4_OPERAND_SEL_SWZ(optok.sel, 2); + op.swizzle[3] = SM4_OPERAND_SEL_SWZ(optok.sel, 3); + break; + case SM4_OPERAND_MODE_SCALAR: + op.swizzle[0] = op.swizzle[1] = op.swizzle[2] = op.swizzle[3] = SM4_OPERAND_SEL_SCALAR(optok.sel); + break; + } + break; + case SM4_OPERAND_COMPNUM_N: + fail("Unhandled operand component type"); + } + op.file = (sm4_file)optok.file; + op.num_indices = optok.num_indices; + + if(optok.extended) + { + sm4_token_operand_extended optokext; + read_token(&optokext); + if(optokext.type == 0) + {} + else if(optokext.type == 1) + { + op.neg = optokext.neg; + op.abs= optokext.abs; + } + else + fail("Unhandled extended operand token type"); + } + + for(unsigned i = 0; i < op.num_indices; ++i) + { + unsigned repr; + if(i == 0) + repr = optok.index0_repr; + else if(i == 1) + repr = optok.index1_repr; + else if(i == 2) + repr = optok.index2_repr; + else + fail("Unhandled operand index representation"); + op.indices[0].disp = 0; + // TODO: is disp supposed to be signed here?? + switch(repr) + { + case SM4_OPERAND_INDEX_REPR_IMM32: + op.indices[i].disp = (int32_t)read32(); + break; + case SM4_OPERAND_INDEX_REPR_IMM64: + op.indices[i].disp = read64(); + break; + case SM4_OPERAND_INDEX_REPR_REG: +relative: + op.indices[i].reg.reset(new sm4_op()); + read_op(&*op.indices[0].reg); + break; + case SM4_OPERAND_INDEX_REPR_REG_IMM32: + op.indices[i].disp = (int32_t)read32(); + goto relative; + case SM4_OPERAND_INDEX_REPR_REG_IMM64: + op.indices[i].disp = read64(); + goto relative; + } + } + + if(op.file == SM4_FILE_IMMEDIATE32) + { + for(unsigned i = 0; i < op.comps; ++i) + op.imm_values[i].i32 = read32(); + } + else if(op.file == SM4_FILE_IMMEDIATE64) + { + for(unsigned i = 0; i < op.comps; ++i) + op.imm_values[i].i64 = read64(); + } + } + + void do_parse() + { + read_token(&program.version); + + unsigned lentok = read32(); + tokens_end = tokens - 2 + lentok; + + while(tokens != tokens_end) + { + sm4_token_instruction insntok; + read_token(&insntok); + unsigned* insn_end = tokens - 1 + insntok.length; + sm4_opcode opcode = (sm4_opcode)insntok.opcode; + check(opcode < SM4_OPCODE_COUNT); + + if(opcode == SM4_OPCODE_CUSTOMDATA) + { + unsigned customlen = read32() - 2; + skip(customlen); + continue; + } + + if((opcode >= SM4_OPCODE_DCL_RESOURCE && opcode <= SM4_OPCODE_DCL_GLOBAL_FLAGS) + || (opcode >= SM4_OPCODE_DCL_STREAM && opcode <= SM4_OPCODE_DCL_RESOURCE_STRUCTURED)) + { + sm4_dcl& dcl = *new sm4_dcl; + program.dcls.push_back(&dcl); + (sm4_token_instruction&)dcl = insntok; + + sm4_token_instruction_extended exttok; + memcpy(&exttok, &insntok, sizeof(exttok)); + while(exttok.extended) + { + read_token(&exttok); + } + +#define READ_OP_ANY dcl.op.reset(new sm4_op()); read_op(&*dcl.op); +#define READ_OP(FILE) READ_OP_ANY + //check(dcl.op->file == SM4_FILE_##FILE); + + switch(opcode) + { + case SM4_OPCODE_DCL_GLOBAL_FLAGS: + break; + case SM4_OPCODE_DCL_RESOURCE: + READ_OP(RESOURCE); + read_token(&dcl.rrt); + break; + case SM4_OPCODE_DCL_SAMPLER: + READ_OP(SAMPLER); + break; + case SM4_OPCODE_DCL_INPUT: + case SM4_OPCODE_DCL_INPUT_PS: + READ_OP(INPUT); + break; + case SM4_OPCODE_DCL_INPUT_SIV: + case SM4_OPCODE_DCL_INPUT_SGV: + case SM4_OPCODE_DCL_INPUT_PS_SIV: + case SM4_OPCODE_DCL_INPUT_PS_SGV: + READ_OP(INPUT); + dcl.sv = (sm4_sv)(uint16_t)read32(); + break; + case SM4_OPCODE_DCL_OUTPUT: + READ_OP(OUTPUT); + break; + case SM4_OPCODE_DCL_OUTPUT_SIV: + case SM4_OPCODE_DCL_OUTPUT_SGV: + READ_OP(OUTPUT); + dcl.sv = (sm4_sv)(uint16_t)read32(); + break; + case SM4_OPCODE_DCL_INDEX_RANGE: + READ_OP_ANY; + check(dcl.op->file == SM4_FILE_INPUT || dcl.op->file == SM4_FILE_OUTPUT); + dcl.num = read32(); + break; + case SM4_OPCODE_DCL_TEMPS: + dcl.num = read32(); + break; + case SM4_OPCODE_DCL_INDEXABLE_TEMP: + READ_OP(INDEXABLE_TEMP); + dcl.indexable_temp.num = read32(); + dcl.indexable_temp.comps = read32(); + break; + case SM4_OPCODE_DCL_CONSTANT_BUFFER: + READ_OP(CONSTANT_BUFFER); + break; + case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE: + case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + break; + case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + dcl.num = read32(); + break; + case SM4_OPCODE_DCL_GS_INSTANCE_COUNT: + dcl.num = read32(); + break; + case SM4_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + case SM4_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + case SM4_OPCODE_DCL_TESS_DOMAIN: + case SM4_OPCODE_DCL_TESS_PARTITIONING: + case SM4_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + break; + case SM4_OPCODE_DCL_HS_MAX_TESSFACTOR: + dcl.f32 = read32(); + break; + case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + dcl.num = read32(); + break; + case SM4_OPCODE_DCL_FUNCTION_BODY: + dcl.num = read32(); + break; + case SM4_OPCODE_DCL_FUNCTION_TABLE: + dcl.num = read32(); + dcl.data = malloc(dcl.num * sizeof(uint32_t)); + for(unsigned i = 0; i < dcl.num; ++i) + ((uint32_t*)dcl.data)[i] = read32(); + break; + case SM4_OPCODE_DCL_INTERFACE: + dcl.intf.id = read32(); + dcl.intf.expected_function_table_length = read32(); + { + uint32_t v = read32(); + dcl.intf.table_length = v & 0xffff; + dcl.intf.array_length = v >> 16; + } + dcl.data = malloc(dcl.intf.table_length * sizeof(uint32_t)); + for(unsigned i = 0; i < dcl.intf.table_length; ++i) + ((uint32_t*)dcl.data)[i] = read32(); + break; + case SM4_OPCODE_DCL_THREAD_GROUP: + dcl.thread_group_size[0] = read32(); + dcl.thread_group_size[1] = read32(); + dcl.thread_group_size[2] = read32(); + break; + case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + READ_OP(UNORDERED_ACCESS_VIEW); + read_token(&dcl.rrt); + break; + case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + READ_OP(UNORDERED_ACCESS_VIEW); + break; + case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + READ_OP(UNORDERED_ACCESS_VIEW); + dcl.structured.stride = read32(); + break; + case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + READ_OP(THREAD_GROUP_SHARED_MEMORY); + dcl.num = read32(); + break; + case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + READ_OP(THREAD_GROUP_SHARED_MEMORY); + dcl.structured.stride = read32(); + dcl.structured.count = read32(); + break; + case SM4_OPCODE_DCL_RESOURCE_RAW: + READ_OP(RESOURCE); + break; + case SM4_OPCODE_DCL_RESOURCE_STRUCTURED: + READ_OP(RESOURCE); + dcl.structured.stride = read32(); + break; + case SM4_OPCODE_DCL_STREAM: + /* TODO: dcl_stream is undocumented: what is it? */ + fail("Unhandled dcl_stream since it's undocumented"); + default: + fail("Unhandled declaration"); + } + + check(tokens == insn_end); + } + else + { + sm4_insn& insn = *new sm4_insn; + program.insns.push_back(&insn); + (sm4_token_instruction&)insn = insntok; + + sm4_token_instruction_extended exttok; + memcpy(&exttok, &insntok, sizeof(exttok)); + while(exttok.extended) + { + read_token(&exttok); + if(exttok.type == SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_SAMPLE_CONTROLS) + { + insn.sample_offset[0] = exttok.sample_controls.offset_u; + insn.sample_offset[1] = exttok.sample_controls.offset_v; + insn.sample_offset[2] = exttok.sample_controls.offset_w; + } + else if(exttok.type == SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_RESOURCE_DIM) + insn.resource_target = exttok.resource_target.target; + else if(exttok.type == SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_RESOURCE_RETURN_TYPE) + { + insn.resource_return_type[0] = exttok.resource_return_type.x; + insn.resource_return_type[1] = exttok.resource_return_type.y; + insn.resource_return_type[2] = exttok.resource_return_type.z; + insn.resource_return_type[3] = exttok.resource_return_type.w; + } + } + + switch(opcode) + { + case SM4_OPCODE_INTERFACE_CALL: + insn.num = read32(); + break; + default: + break; + } + + unsigned op_num = 0; + while(tokens != insn_end) + { + check(tokens < insn_end); + check(op_num < SM4_MAX_OPS); + insn.ops[op_num].reset(new sm4_op); + read_op(&*insn.ops[op_num]); + ++op_num; + } + insn.num_ops = op_num; + } + } + } + + const char* parse() + { + try + { + do_parse(); + return 0; + } + catch(const char* error) + { + return error; + } + } +}; + +sm4_program* sm4_parse(void* tokens, int size) +{ + sm4_program* program = new sm4_program; + sm4_parser parser(*program, tokens, size); + if(!parser.parse()) + return program; + delete program; + return 0; +} diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/utils.h b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/utils.h new file mode 100644 index 0000000000..6e77b51175 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/utils.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BYTESWAP_H_ +#define BYTESWAP_H_ + +#include <stdint.h> +#include <assert.h> + +#ifdef WORDS_BIGENDIAN +static inline uint32_t le32_to_cpu(uint32_t v) +{ + return ((v & 0xff) << 24) | ((v & 0xff00) << 8) | ((v & 0xff0000) >> 8) | ((v & 0xff000000) >> 24); +} +#else +static inline uint32_t le32_to_cpu(uint32_t v) +{ + return v; +} +#endif + +#endif /* BYTESWAP_H_ */ diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/tools/fxdis.cpp b/src/gallium/state_trackers/d3d1x/d3d1xshader/tools/fxdis.cpp new file mode 100644 index 0000000000..20a7cbd1c3 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/tools/fxdis.cpp @@ -0,0 +1,75 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "dxbc.h" +#include "sm4.h" +#include <iostream> +#include <fstream> + +void usage() +{ + std::cerr << "Gallium Direct3D10/11 Shader Disassembler\n"; + std::cerr << "This program is free software, released under a MIT-like license\n"; + std::cerr << "Not affiliated with or endorsed by Microsoft in any way\n"; + std::cerr << "Latest version available from http://cgit.freedesktop.org/mesa/mesa/\n"; + std::cerr << "\n"; + std::cerr << "Usage: fxdis FILE\n"; + std::cerr << std::endl; +} + +int main(int argc, char** argv) +{ + if(argc < 2) + { + usage(); + return 1; + } + + std::vector<char> data; + std::ifstream in(argv[1]); + char c; + in >> std::noskipws; + while(in >> c) + data.push_back(c); + in.close(); + + dxbc_container* dxbc = dxbc_parse(&data[0], data.size()); + if(dxbc) + { + std::cout << *dxbc; + dxbc_chunk_header* sm4_chunk = dxbc_find_shader_bytecode(&data[0], data.size()); + if(sm4_chunk) + { + sm4_program* sm4 = sm4_parse(sm4_chunk + 1, bswap_le32(sm4_chunk->size)); + if(sm4) + { + std::cout << *sm4; + delete sm4; + } + } + delete dxbc; + } +} diff --git a/src/gallium/state_trackers/d3d1x/d3d1xstutil/Makefile b/src/gallium/state_trackers/d3d1x/d3d1xstutil/Makefile new file mode 100644 index 0000000000..f986f8e5f1 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xstutil/Makefile @@ -0,0 +1,5 @@ +LIBNAME=d3d1xstutil +CPP_SOURCES=$(wildcard src/*.cpp) +LIBRARY_INCLUDES=-Iinclude -I../gd3dapi -I../d3dapi -I../w32api -I../../../include -I../../../auxiliary + +include ../Makefile.inc diff --git a/src/gallium/state_trackers/d3d1x/d3d1xstutil/include/d3d1xstutil.h b/src/gallium/state_trackers/d3d1x/d3d1xstutil/include/d3d1xstutil.h new file mode 100644 index 0000000000..af355f0227 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xstutil/include/d3d1xstutil.h @@ -0,0 +1,1110 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef D3D1XSTUTIL_H_ +#define D3D1XSTUTIL_H_ + +#ifdef _MSC_VER +#include <unordered_map> +#include <unordered_set> +#else +#include <tr1/unordered_map> +#include <tr1/unordered_set> +namespace std +{ + using namespace tr1; +} +#endif +#include <map> +#include <utility> + +#define WIN32_LEAN_AND_MEAN +#include <objbase.h> + +#include "galliumdxgi.h" +#include <d3dcommon.h> + +extern "C" +{ +#include <util/u_atomic.h> +#include <pipe/p_format.h> +#include <os/os_thread.h> +} + +#include <assert.h> +#ifdef min +#undef min +#endif +#ifdef max +#undef max +#endif + +#define D3D_PRIMITIVE_TOPOLOGY_COUNT 65 +extern unsigned d3d_to_pipe_prim[D3D_PRIMITIVE_TOPOLOGY_COUNT]; + +#define D3D_PRIMITIVE_COUNT 40 +extern unsigned d3d_to_pipe_prim_type[D3D_PRIMITIVE_COUNT]; + +/* NOTE: this _depends_ on the vtable layout of the C++ compiler to be + * binary compatible with Windows. + * Furthermore some absurd vtable layout likely won't work at all, since + * we perform some casts which are probably not safe by the C++ standard. + * + * In particular, the GNU/Linux/Itanium/clang ABI and Microsoft ABIs will work, + * but others may not. + * If in doubt, just switch to the latest version of a widely used C++ compiler. + * + * DESIGN of the Gallium COM implementation + * + * This state tracker uses somewhat unusual C++ coding patterns, + * to implement the COM interfaces required by Direct3D. + * + * While it may seem complicated, the effect is that the result + * generally behaves as intuitively as possible: in particular pointer + * casts very rarely change the pointer value (only for secondary + * DXGI/Gallium interfaces) + * + * Implementing COM is on first sight very easy: after all, it just + * consists of a reference count, and a dynamic_cast<> equivalent. + * + * However, implementing objects with multiple interfaces is actually + * quite tricky. + * The issue is that the interface pointers can't be equal, since this + * would place incompatible constraints on the vtable layout and thus + * multiple inheritance (and the subobjects the C++ compiler creates + * with it) must be correctly used. + * + * Furthermore, we must have a single reference count, which means + * that a naive implementation won't work, and it's necessary to either + * use virtual inheritance, or the "mixin inheritance" model we use. + * + * This solution aims to achieve the following object layout: + * 0: pointer to vtable for primary interface + * 1: reference count + * ... main class + * ... vtable pointers for secondary interfaces + * ... implementation of subclasses assuming secondary interfaces + * + * This allows us to cast pointers by just reinterpreting the value in + * almost all cases. + * + * To achieve this, *all* non-leaf classes must have their parent + * or the base COM interface as a template parameter, since derived + * classes may need to change that to support an interface derived + * from the one implemented by the superclass. + * + * Note however, that you can cast without regard to the template + * parameter, because only the vtable layout depends on it, since + * interfaces have no data members. + * + * For this to work, DON'T USE VIRTUAL FUNCTIONS except to implement + * interfaces, since the vtable layouts would otherwise be mismatched. + * An exception are virtual functions called only from other virtual functions, + * which is currently only used for the virtual destructor. + * + * The base class is GalliumComObject<IFoo>, which implements the + * IUnknown interface, and inherits IFoo. + * + * To support multiple inheritance, we insert GalliumMultiComObject, + * which redirects the secondary interfaces to the GalliumComObject + * superclass. + * + * Gallium(Multi)PrivateDataComObject is like ComObject but also + * implements the Get/SetPrivateData functions present on several + * D3D/DXGI interfaces. + * + * Example class hierarchy: + * + * IUnknown + * (pure interface) + * | + * V + * IAnimal + * (pure interface) + * | + * V + * IDuck + * (pure interface) + * | + * V + * GalliumComObject<IDuck> + * (non-instantiable, only implements IUnknown) + * | + * V + * GalliumAnimal<IDuck> + * (non-instantiable, only implements IAnimal) + * | + * V + * GalliumDuck + * (concrete) + * | + * V + * GalliumMultiComObject<GalliumDuck, IWheeledVehicle> <- IWheeledVehicle <- IVehicle <- IUnknown (second version) + * (non-instantiable, only implements IDuck and the IUnknown of IWheeledVehicle) + * | + * V + * GalliumDuckOnWheels + * (concrete) + * + * This will produce the desired layout. + * Note that GalliumAnimal<IFoo>* is safely castable to GalliumAnimal<IBar>* + * by reinterpreting, as long as non-interface virtual functions are not used, + * and that you only call interface functions for the superinterface of IBar + * that the object actually implements. + * + * Instead, if GalliumDuck where to inherit both from GalliumAnimal + * and IDuck, then (IDuck*)gallium_duck and (IAnimal*)gallium_duck would + * have different pointer values, which the "base class as template parameter" + * trick avoids. + * + * The price we pay is that you MUST NOT have virtual functions other than those + * implementing interfaces (except for leaf classes) since the position of these + * would depend on the base interface. + * As mentioned above, virtual functions only called from interface functions + * are an exception, currently used only for the virtual destructor. + * If you want virtual functions anyway , put them in a separate interface class, + * multiply inherit from that and cast the pointer to that interface. + * + * You CAN however have virtual functions on any class which does not specify + * his base as a template parameter, or where you don't need to change the + * template base interface parameter by casting. + * + * --- The magic QueryInterface "delete this" trick --- + * + * When the reference count drops to 0, we must delete the class. + * The problem is, that we must call the right virtual destructor (i.e. on the right class). + * However, we would like to be able to call release() and nonatomic_release() + * non-virtually for performance (also, the latter cannot be called virtually at all, since + * IUnknown does not offer it). + * + * The naive solution would be to just add a virtual destructor and rely on it. + * However, this doesn't work due to the fact that as described above we perform casets + * with are unsafe regarding vtable layout. + * In particular, consider the case where we try to delete GalliumComObject<ID3D11Texture2D> + * with a pointer to GalliumComObject<ID3D11Resource>. + * Since we think that this is a GalliumComObject<ID3D11Resource>, we'll look for the + * destructor in the vtable slot immediately after the ID3D11Resource vtable, but this is + * actually an ID3D11Texture2D function implemented by the object! + * + * So, we must put the destructor somewhere else. + * We could add it as a data member, but it would be awkward and it would bloat the + * class. + * Thus, we use this trick: we reuse the vtable slot for QueryInterface, which is always at the + * same position. + * To do so, we define a special value for the first pointer argument, that triggers a + * "delete this". + * In addition to that, we add a virtual destructor to GalliumComObject. + * That virtual destructor will be called by QueryInterface, and since that is a virtual + * function, it will know the correct place for the virtual destructor. + * + * QueryInterface is already slow due to the need to compare several GUIDs, so the + * additional pointer test should not be significant. + * + * Of course the ideal solution would be telling the C++ compiler to put the + * destructor it in a negative vtable slot, but unfortunately GCC doesn't support that + * yet, and this method is almost as good as that. + */ + +template<typename T> +struct com_traits; + +#define COM_INTERFACE(intf, base) \ +template<> \ +struct com_traits<intf> \ +{ \ + static REFIID iid() {return IID_##intf;} \ + static inline bool is_self_or_ancestor(REFIID riid) {return riid == iid() || com_traits<base>::is_self_or_ancestor(riid);} \ +}; + +template<> +struct com_traits<IUnknown> +{ + static REFIID iid() {return IID_IUnknown;} + static inline bool is_self_or_ancestor(REFIID riid) {return riid == iid();} +}; + +#ifndef _MSC_VER +#define __uuidof(T) (com_traits<T>::iid()) +#endif + +struct refcnt_t +{ + uint32_t refcnt; + + refcnt_t(unsigned v = 1) + : refcnt(v) + {} + + unsigned add_ref() + { + p_atomic_inc((int32_t*)&refcnt); + return refcnt; + } + + unsigned release() + { + if(p_atomic_dec_zero((int32_t*)&refcnt)) + return 0; + return refcnt; + } + + void nonatomic_add_ref() + { + p_atomic_inc((int32_t*)&refcnt); + } + + unsigned nonatomic_release() + { + if(p_atomic_dec_zero((int32_t*)&refcnt)) + return 0; + else + return 1; + } +}; + +#if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) +/* this should be safe because atomic ops are full memory barriers, and thus a sequence that does: + * ++one_refcnt; + * --other_refcnt; + * should never be reorderable (as seen from another CPU) to: + * --other_refcnt + * ++one_refcnt + * + * since one of the ops is atomic. + * If this weren't the case, a CPU could incorrectly destroy an object manipulated in that way by another one. + */ +struct dual_refcnt_t +{ + union + { + uint64_t refcnt; + struct + { + uint32_t atomic_refcnt; + uint32_t nonatomic_refcnt; + }; + }; + + dual_refcnt_t(unsigned v = 1) + { + atomic_refcnt = v; + nonatomic_refcnt = 0; + } + + bool is_zero() + { + if(sizeof(void*) == 8) + return *(volatile uint64_t*)&refcnt == 0ULL; + else + { + uint64_t v; + do + { + v = refcnt; + } + while(!__sync_bool_compare_and_swap(&refcnt, v, v)); + return v == 0ULL; + } + } + + unsigned add_ref() + { + //printf("%p add_ref at %u %u\n", this, atomic_refcnt, nonatomic_refcnt); + p_atomic_inc((int32_t*)&atomic_refcnt); + return atomic_refcnt + nonatomic_refcnt; + } + + unsigned release() + { + //printf("%p release at %u %u\n", this, atomic_refcnt, nonatomic_refcnt); + if(p_atomic_dec_zero((int32_t*)&atomic_refcnt) && !nonatomic_refcnt && is_zero()) + return 0; + unsigned v = atomic_refcnt + nonatomic_refcnt; + return v ? v : 1; + } + + void nonatomic_add_ref() + { + //printf("%p nonatomic_add_ref at %u %u\n", this, atomic_refcnt, nonatomic_refcnt); + ++nonatomic_refcnt; + } + + unsigned nonatomic_release() + { + //printf("%p nonatomic_release at %u %u\n", this, atomic_refcnt, nonatomic_refcnt); + if(!--nonatomic_refcnt) + { + __sync_synchronize(); + if(!atomic_refcnt && is_zero()) + return 0; + } + return 1; + } +}; +#else +// this will result in atomic operations being used while they could have been avoided +#ifdef __i386__ +#warning Compile for 586+ using GCC to improve the performance of the Direct3D 10/11 state tracker +#endif +typedef refcnt_t dual_refcnt_t; +#endif + +#define IID_MAGIC_DELETE_THIS (*(const IID*)((intptr_t)-(int)(sizeof(IID) - 1))) + +template<typename Base = IUnknown, typename RefCnt = refcnt_t> +struct GalliumComObject : public Base +{ + RefCnt refcnt; + + GalliumComObject() + {} + + /* DO NOT CALL this from externally called non-virtual functions in derived classes, since + * the vtable position depends on the COM interface being implemented + */ + virtual ~GalliumComObject() + {} + + inline ULONG add_ref() + { + return refcnt.add_ref(); + } + + inline ULONG release() + { + ULONG v = refcnt.release(); + if(!v) + { + /* this will call execute "delete this", using the correct vtable slot for the destructor */ + /* see the initial comment for an explaination of this magic trick */ + this->QueryInterface(IID_MAGIC_DELETE_THIS, 0); + return 0; + } + return v; + } + + inline void nonatomic_add_ref() + { + refcnt.nonatomic_add_ref(); + } + + inline void nonatomic_release() + { + if(!refcnt.nonatomic_release()) + { + /* this will execute "delete this", using the correct vtable slot for the destructor */ + /* see the initial comment for an explaination of this magic trick */ + this->QueryInterface(IID_MAGIC_DELETE_THIS, 0); + } + } + + inline HRESULT query_interface(REFIID riid, void **ppvObject) + { + if(com_traits<Base>::is_self_or_ancestor(riid)) + { + // must be the virtual AddRef, since it is overridden by some classes + this->AddRef(); + *ppvObject = this; + return S_OK; + } + else + return E_NOINTERFACE; + } + + virtual ULONG STDMETHODCALLTYPE AddRef() + { + return add_ref(); + } + + virtual ULONG STDMETHODCALLTYPE Release() + { + return release(); + } + + virtual HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void **ppvObject) + { + /* see the initial comment for an explaination of this magic trick */ + if(&riid == &IID_MAGIC_DELETE_THIS) + { + delete this; + return 0; + } + if(!this) + return E_INVALIDARG; + if(!ppvObject) + return E_POINTER; + return query_interface(riid, ppvObject); + } +}; + +template<typename BaseClass, typename SecondaryInterface> +struct GalliumMultiComObject : public BaseClass, SecondaryInterface +{ + // we could avoid this duplication, but the increased complexity to do so isn't worth it + virtual ULONG STDMETHODCALLTYPE AddRef() + { + return BaseClass::add_ref(); + } + + virtual ULONG STDMETHODCALLTYPE Release() + { + return BaseClass::release(); + } + + inline HRESULT query_interface(REFIID riid, void **ppvObject) + { + HRESULT hr = BaseClass::query_interface(riid, ppvObject); + if(SUCCEEDED(hr)) + return hr; + if(com_traits<SecondaryInterface>::is_self_or_ancestor(riid)) + { + // must be the virtual AddRef, since it is overridden by some classes + this->AddRef(); + *ppvObject = (SecondaryInterface*)this; + return S_OK; + } + else + return E_NOINTERFACE; + } + + virtual HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void **ppvObject) + { + /* see the initial comment for an explaination of this magic trick */ + if(&riid == &IID_MAGIC_DELETE_THIS) + { + delete this; + return 0; + } + if(!this) + return E_INVALIDARG; + if(!ppvObject) + return E_POINTER; + return query_interface(riid, ppvObject); + } +}; + +template<typename T, typename Traits> +struct refcnt_ptr +{ + T* p; + + refcnt_ptr() + : p(0) + {} + + void add_ref() {Traits::add_ref(p);} + void release() {Traits::release(p);} + + template<typename U, typename UTraits> + refcnt_ptr(const refcnt_ptr<U, UTraits>& c) + { + *this = static_cast<U*>(c.ref()); + } + + ~refcnt_ptr() + { + release(); + } + + void reset(T* q) + { + release(); + p = q; + } + + template<typename U, typename UTraits> + refcnt_ptr& operator =(const refcnt_ptr<U, UTraits>& q) + { + return *this = q.p; + } + + template<typename U> + refcnt_ptr& operator =(U* q) + { + release(); + p = static_cast<T*>(q); + add_ref(); + return *this; + } + + T* ref() + { + add_ref(); + return p; + } + + T* steal() + { + T* ret = p; + p = 0; + return ret; + } + + T* operator ->() + { + return p; + } + + const T* operator ->() const + { + return p; + } + + T** operator &() + { + assert(!p); + return &p; + } + + bool operator !() const + { + return !p; + } + + typedef T* refcnt_ptr::*unspecified_bool_type; + + operator unspecified_bool_type() const + { + return p ? &refcnt_ptr::p : 0; + } +}; + +struct simple_ptr_traits +{ + static void add_ref(void* p) {} + static void release(void* p) {} +}; + +struct com_ptr_traits +{ + static void add_ref(void* p) + { + if(p) + ((IUnknown*)p)->AddRef(); + } + + static void release(void* p) + { + if(p) + ((IUnknown*)p)->Release(); + } +}; + +template<typename T> +struct ComPtr : public refcnt_ptr<T, com_ptr_traits> +{ + template<typename U, typename UTraits> + ComPtr& operator =(const refcnt_ptr<U, UTraits>& q) + { + return *this = q.p; + } + + template<typename U> + ComPtr& operator =(U* q) + { + this->release(); + this->p = static_cast<T*>(q); + this->add_ref(); + return *this; + } +}; + +template<typename T, typename TTraits, typename U, typename UTraits> +bool operator ==(const refcnt_ptr<T, TTraits>& a, const refcnt_ptr<U, UTraits>& b) +{ + return a.p == b.p; +} + +template<typename T, typename TTraits, typename U> +bool operator ==(const refcnt_ptr<T, TTraits>& a, U* b) +{ + return a.p == b; +} + +template<typename T, typename TTraits, typename U> +bool operator ==(U* b, const refcnt_ptr<T, TTraits>& a) +{ + return a.p == b; +} + +template<typename T, typename TTraits, typename U, typename UTraits> +bool operator !=(const refcnt_ptr<T, TTraits>& a, const refcnt_ptr<U, UTraits>& b) +{ + return a.p != b.p; +} + +template<typename T, typename TTraits, typename U> +bool operator !=(const refcnt_ptr<T, TTraits>& a, U* b) +{ + return a.p != b; +} + +template<typename T, typename TTraits, typename U> +bool operator !=(U* b, const refcnt_ptr<T, TTraits>& a) +{ + return a.p != b; +} + +template<bool threadsafe> +struct maybe_mutex_t; + +template<> +struct maybe_mutex_t<true> +{ + pipe_mutex mutex; + + maybe_mutex_t() + { + pipe_mutex_init(mutex); + } + + void lock() + { + pipe_mutex_lock(mutex); + } + + void unlock() + { + pipe_mutex_unlock(mutex); + } +}; + +template<> +struct maybe_mutex_t<false> +{ + void lock() + { + } + + void unlock() + { + } +}; + +typedef maybe_mutex_t<true> mutex_t; + +template<typename T> +struct lock_t +{ + T& mutex; + lock_t(T& mutex) + : mutex(mutex) + { + mutex.lock(); + } + + ~lock_t() + { + mutex.unlock(); + } +}; + +struct c_string +{ + const char* p; + c_string(const char* p) + : p(p) + {} + + operator const char*() const + { + return p; + } +}; + +static inline bool operator ==(const c_string& a, const c_string& b) +{ + return !strcmp(a.p, b.p); +} + +static inline bool operator !=(const c_string& a, const c_string& b) +{ + return strcmp(a.p, b.p); +} + +static inline size_t raw_hash(const char* p, size_t size) +{ + size_t res; + if(sizeof(size_t) >= 8) + res = (size_t)14695981039346656037ULL; + else + res = (size_t)2166136261UL; + const char* end = p + size; + for(; p != end; ++p) + { + res ^= (size_t)*p; + if(sizeof(size_t) >= 8) + res *= (size_t)1099511628211ULL; + else + res *= (size_t)16777619UL; + } + return res; +}; + +template<typename T> +static inline size_t raw_hash(const T& t) +{ + return raw_hash((const char*)&t, sizeof(t)); +} + +// TODO: only tested with the gcc libstdc++, might not work elsewhere +namespace std +{ +#ifndef _MSC_VER + namespace tr1 + { +#endif + template<> + struct hash<GUID> : public std::unary_function<GUID, size_t> + { + inline size_t operator()(GUID __val) const; + }; + + inline size_t hash<GUID>::operator()(GUID __val) const + { + return raw_hash(__val); + } + + template<> + struct hash<c_string> : public std::unary_function<c_string, size_t> + { + inline size_t operator()(c_string __val) const; + }; + + inline size_t hash<c_string>::operator()(c_string __val) const + { + return raw_hash(__val.p, strlen(__val.p)); + } + + template<typename T, typename U> + struct hash<std::pair<T, U> > : public std::unary_function<std::pair<T, U>, size_t> + { + inline size_t operator()(std::pair<T, U> __val) const; + }; + + template<typename T, typename U> + inline size_t hash<std::pair<T, U> >::operator()(std::pair<T, U> __val) const + { + std::pair<size_t, size_t> p; + p.first = hash<T>()(__val.first); + p.second = hash<U>()(__val.second); + return raw_hash(p); + } +#ifndef _MSC_VER + } +#endif +} + +template<typename Base, typename RefCnt = refcnt_t> +struct GalliumPrivateDataComObject : public GalliumComObject<Base, RefCnt> +{ + typedef std::unordered_map<GUID, std::pair<void*, unsigned> > private_data_map_t; + private_data_map_t private_data_map; + mutex_t private_data_mutex; + + ~GalliumPrivateDataComObject() + { + for(private_data_map_t::iterator i = private_data_map.begin(), e = private_data_map.end(); i != e; ++i) + { + if(i->second.second == ~0u) + ((IUnknown*)i->second.first)->Release(); + else + free(i->second.first); + } + } + + HRESULT get_private_data( + REFGUID guid, + UINT *pDataSize, + void *pData) + { + lock_t<mutex_t> lock(private_data_mutex); + private_data_map_t::iterator i = private_data_map.find(guid); + *pDataSize = 0; + if(i == private_data_map.end()) + return DXGI_ERROR_NOT_FOUND; + if(i->second.second == ~0u) + { + /* TODO: is GetPrivateData on interface data supposed to do this? */ + if(*pDataSize < sizeof(void*)) + return E_INVALIDARG; + if(pData) + { + memcpy(pData, &i->second.first, sizeof(void*)); + ((IUnknown*)i->second.first)->AddRef(); + } + *pDataSize = sizeof(void*); + } + else + { + unsigned size = std::min(*pDataSize, i->second.second); + if(pData) + memcpy(pData, i->second.first, size); + *pDataSize = size; + } + return S_OK; + } + + HRESULT set_private_data( + REFGUID guid, + UINT DataSize, + const void *pData) + { + void* p = 0; + + if(DataSize && pData) + { + p = malloc(DataSize); + if(!p) + return E_OUTOFMEMORY; + } + + lock_t<mutex_t> lock(private_data_mutex); + std::pair<void*, unsigned>& v = private_data_map[guid]; + if(v.first) + { + if(v.second == ~0u) + ((IUnknown*)v.first)->Release(); + else + free(v.first); + } + if(DataSize && pData) + { + memcpy(p, pData, DataSize); + v.first = p; + v.second = DataSize; + } + else + private_data_map.erase(guid); + return S_OK; + } + + HRESULT set_private_data_interface( + REFGUID guid, + const IUnknown *pData) + { + lock_t<mutex_t> lock(private_data_mutex); + std::pair<void*, unsigned>& v = private_data_map[guid]; + if(v.first) + { + if(v.second == ~0u) + ((IUnknown*)v.first)->Release(); + else + free(v.first); + } + if(pData) + { + ((IUnknown*)pData)->AddRef(); + v.first = (void*)pData; + v.second = ~0; + } + else + private_data_map.erase(guid); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetPrivateData( + REFGUID guid, + UINT *pDataSize, + void *pData) + { + return get_private_data(guid, pDataSize, pData); + } + + virtual HRESULT STDMETHODCALLTYPE SetPrivateData( + REFGUID guid, + UINT DataSize, + const void *pData) + { + return set_private_data(guid, DataSize, pData); + } + + virtual HRESULT STDMETHODCALLTYPE SetPrivateDataInterface( + REFGUID guid, + const IUnknown *pData) + { + return set_private_data_interface(guid, pData); + } +}; + +template<typename BaseClass, typename SecondaryInterface> +struct GalliumMultiPrivateDataComObject : public GalliumMultiComObject<BaseClass, SecondaryInterface> +{ + // we could avoid this duplication, but the increased complexity to do so isn't worth it + virtual HRESULT STDMETHODCALLTYPE GetPrivateData( + REFGUID guid, + UINT *pDataSize, + void *pData) + { + return BaseClass::get_private_data(guid, pDataSize, pData); + } + + virtual HRESULT STDMETHODCALLTYPE SetPrivateData( + REFGUID guid, + UINT DataSize, + const void *pData) + { + return BaseClass::set_private_data(guid, DataSize, pData); + } + + virtual HRESULT STDMETHODCALLTYPE SetPrivateDataInterface( + REFGUID guid, + const IUnknown *pData) + { + return BaseClass::set_private_data_interface(guid, pData); + } +}; + +#define DXGI_FORMAT_COUNT 100 +extern pipe_format dxgi_to_pipe_format[DXGI_FORMAT_COUNT]; +extern DXGI_FORMAT pipe_to_dxgi_format[PIPE_FORMAT_COUNT]; + +void init_pipe_to_dxgi_format(); + +COM_INTERFACE(IGalliumDevice, IUnknown); +COM_INTERFACE(IGalliumAdapter, IUnknown); +COM_INTERFACE(IGalliumResource, IUnknown); + +// used to make QueryInterface know the IIDs of the interface and its ancestors +COM_INTERFACE(IDXGIObject, IUnknown) +COM_INTERFACE(IDXGIDeviceSubObject, IDXGIObject) +COM_INTERFACE(IDXGISurface, IDXGIDeviceSubObject) +COM_INTERFACE(IDXGIOutput, IDXGIObject) +COM_INTERFACE(IDXGIAdapter, IDXGIObject) +COM_INTERFACE(IDXGISwapChain, IDXGIDeviceSubObject) +COM_INTERFACE(IDXGIFactory, IDXGIObject) +COM_INTERFACE(IDXGIDevice, IDXGIObject) +COM_INTERFACE(IDXGIResource, IDXGIDeviceSubObject) +COM_INTERFACE(IDXGISurface1, IDXGISurface) +COM_INTERFACE(IDXGIDevice1, IDXGIDevice) +COM_INTERFACE(IDXGIAdapter1, IDXGIAdapter) +COM_INTERFACE(IDXGIFactory1, IDXGIFactory) + +template<typename Base> +struct GalliumDXGIDevice : public GalliumMultiPrivateDataComObject<Base, IDXGIDevice1> +{ + ComPtr<IDXGIAdapter> adapter; + int priority; + unsigned max_latency; + + GalliumDXGIDevice(IDXGIAdapter* p_adapter) + { + adapter = p_adapter; + } + + virtual HRESULT STDMETHODCALLTYPE GetParent( + REFIID riid, + void **ppParent) + { + return adapter.p->QueryInterface(riid, ppParent); + } + + virtual HRESULT STDMETHODCALLTYPE GetAdapter( + IDXGIAdapter **pAdapter) + { + *pAdapter = adapter.ref(); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE QueryResourceResidency( + IUnknown *const *ppResources, + DXGI_RESIDENCY *pResidencyStatus, + UINT NumResources) + { + for(unsigned i = 0; i < NumResources; ++i) + pResidencyStatus[i] = DXGI_RESIDENCY_FULLY_RESIDENT; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE SetGPUThreadPriority( + INT Priority) + { + priority = Priority; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetGPUThreadPriority( + INT *pPriority) + { + *pPriority = priority; + return S_OK; + } + + HRESULT STDMETHODCALLTYPE GetMaximumFrameLatency( + UINT *pMaxLatency + ) + { + *pMaxLatency = max_latency; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE SetMaximumFrameLatency( + UINT MaxLatency) + { + max_latency = MaxLatency; + return S_OK; + } +}; + +COM_INTERFACE(ID3D10Blob, IUnknown); + +/* NOTE: ID3DBlob implementations may come from a Microsoft native DLL + * (e.g. d3dcompiler), or perhaps even from the application itself. + * + * Hence, never try to access the data/size members directly, which is why they are private. + * In internal code, use std::pair<void*, size_t> instead of this class. + */ +class GalliumD3DBlob : public GalliumComObject<ID3DBlob> +{ + void* data; + size_t size; + +public: + GalliumD3DBlob(void* data, size_t size) + : data(data), size(size) + {} + + ~GalliumD3DBlob() + { + free(data); + } + + virtual LPVOID STDMETHODCALLTYPE GetBufferPointer() + { + return data; + } + + virtual SIZE_T STDMETHODCALLTYPE GetBufferSize() + { + return size; + } +}; + +#endif /* D3D1XSTUTIL_H_ */ diff --git a/src/gallium/state_trackers/d3d1x/d3d1xstutil/src/d3d_sm4_enums.cpp b/src/gallium/state_trackers/d3d1x/d3d1xstutil/src/d3d_sm4_enums.cpp new file mode 100644 index 0000000000..410a56c248 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xstutil/src/d3d_sm4_enums.cpp @@ -0,0 +1,42 @@ +#include "d3d1xstutil.h" + +unsigned d3d_to_pipe_prim[D3D_PRIMITIVE_TOPOLOGY_COUNT] = +{ + 0, + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINE_STRIP, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLE_STRIP, + PIPE_PRIM_LINES_ADJACENCY, + PIPE_PRIM_LINE_STRIP_ADJACENCY, + PIPE_PRIM_TRIANGLES_ADJACENCY, + PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY, + /* gap */ + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* patches */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; + +unsigned d3d_to_pipe_prim_type[D3D_PRIMITIVE_COUNT] = +{ + 0, + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + 0, + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES_ADJACENCY, + PIPE_PRIM_TRIANGLES_ADJACENCY, + /* patches */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; diff --git a/src/gallium/state_trackers/d3d1x/d3d1xstutil/src/dxgi_enums.cpp b/src/gallium/state_trackers/d3d1x/d3d1xstutil/src/dxgi_enums.cpp new file mode 100644 index 0000000000..da28e64384 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xstutil/src/dxgi_enums.cpp @@ -0,0 +1,147 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <d3d1xstutil.h> + +/* D3D has to keep binary compatibility, so these tables will always work + * However, Gallium -> D3D conversion must use .[PIPE_xxx] = D3D11_xxx syntax */ + +pipe_format dxgi_to_pipe_format[DXGI_FORMAT_COUNT] = +{ + PIPE_FORMAT_NONE, + PIPE_FORMAT_R32G32B32A32_FLOAT, /* TYPELESS */ + PIPE_FORMAT_R32G32B32A32_FLOAT, + PIPE_FORMAT_R32G32B32A32_USCALED, + PIPE_FORMAT_R32G32B32A32_SSCALED, + PIPE_FORMAT_R32G32B32_FLOAT, /* TYPELESS */ + PIPE_FORMAT_R32G32B32_FLOAT, + PIPE_FORMAT_R32G32B32_USCALED, + PIPE_FORMAT_R32G32B32_SSCALED, + PIPE_FORMAT_R16G16B16A16_FLOAT, /* TYPELESS */ + PIPE_FORMAT_R16G16B16A16_FLOAT, + PIPE_FORMAT_R16G16B16A16_UNORM, + PIPE_FORMAT_R16G16B16A16_USCALED, + PIPE_FORMAT_R16G16B16A16_SNORM, + PIPE_FORMAT_R16G16B16A16_SSCALED, + PIPE_FORMAT_R32G32_FLOAT, /* TYPELESS */ + PIPE_FORMAT_R32G32_FLOAT, + PIPE_FORMAT_R32G32_USCALED, + PIPE_FORMAT_R32G32_SSCALED, + PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, /* PIPE_FORMAT_R32G8X24_FLOAT_TYPELESS */ + PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, + PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, /* PIPE_FORMAT_R32_FLOAT_X8X24_TYPELESS */ + PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, /* PIPE_FORMAT_X32_TYPELESS_G8X24_USCALED */ + PIPE_FORMAT_R10G10B10A2_UNORM, /* TYPELESS */ + PIPE_FORMAT_R10G10B10A2_UNORM, + PIPE_FORMAT_R10G10B10A2_USCALED, + PIPE_FORMAT_R11G11B10_FLOAT, + PIPE_FORMAT_R8G8B8A8_UNORM, /* TYPELESS */ + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_R8G8B8A8_SRGB, + PIPE_FORMAT_R8G8B8A8_USCALED, + PIPE_FORMAT_R8G8B8A8_SNORM, + PIPE_FORMAT_R8G8B8A8_SSCALED, + PIPE_FORMAT_R16G16_FLOAT, /* TYPELESS */ + PIPE_FORMAT_R16G16_FLOAT, + PIPE_FORMAT_R16G16_UNORM, + PIPE_FORMAT_R16G16_USCALED, + PIPE_FORMAT_R16G16_SNORM, + PIPE_FORMAT_R16G16_SSCALED, + PIPE_FORMAT_R32_FLOAT, /* TYPELESS */ + PIPE_FORMAT_Z32_FLOAT, + PIPE_FORMAT_R32_FLOAT, + PIPE_FORMAT_R32_USCALED, + PIPE_FORMAT_R32_SSCALED, + PIPE_FORMAT_Z24_UNORM_S8_USCALED, /* PIPE_FORMAT_R24G8_TYPELESS */ + PIPE_FORMAT_Z24_UNORM_S8_USCALED, + PIPE_FORMAT_Z24X8_UNORM, /* PIPE_FORMAT_R24_UNORM_X8_TYPELESS */ + PIPE_FORMAT_Z24_UNORM_S8_USCALED, /* PIPE_FORMAT_X24_TYPELESS_G8_USCALED */ + PIPE_FORMAT_R8G8_UNORM, /* TYPELESS */ + PIPE_FORMAT_R8G8_UNORM, + PIPE_FORMAT_R8G8_USCALED, + PIPE_FORMAT_R8G8_SNORM, + PIPE_FORMAT_R8G8_SSCALED, + PIPE_FORMAT_R16_FLOAT, /* TYPELESS */ + PIPE_FORMAT_R16_FLOAT, + PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_R16_UNORM, + PIPE_FORMAT_R16_USCALED, + PIPE_FORMAT_R16_SNORM, + PIPE_FORMAT_R16_SSCALED, + PIPE_FORMAT_R8_UNORM, /* TYPELESS */ + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8_USCALED, + PIPE_FORMAT_R8_SNORM, + PIPE_FORMAT_R8_SSCALED, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_R1_UNORM, + PIPE_FORMAT_R9G9B9E5_FLOAT, + PIPE_FORMAT_R8G8_B8G8_UNORM, + PIPE_FORMAT_G8R8_G8B8_UNORM, + PIPE_FORMAT_DXT1_RGBA, /* TYPELESS */ + PIPE_FORMAT_DXT1_RGBA, + PIPE_FORMAT_DXT1_SRGBA, + PIPE_FORMAT_DXT3_RGBA, /* TYPELESS */ + PIPE_FORMAT_DXT3_RGBA, + PIPE_FORMAT_DXT3_SRGBA, + PIPE_FORMAT_DXT5_RGBA, /* TYPELESS */ + PIPE_FORMAT_DXT5_RGBA, + PIPE_FORMAT_DXT5_SRGBA, + PIPE_FORMAT_RGTC1_UNORM, /* TYPELESS */ + PIPE_FORMAT_RGTC1_UNORM, + PIPE_FORMAT_RGTC1_SNORM, + PIPE_FORMAT_RGTC2_UNORM, /* TYPELESS */ + PIPE_FORMAT_RGTC2_UNORM, + PIPE_FORMAT_RGTC2_SNORM, + PIPE_FORMAT_B5G6R5_UNORM, + PIPE_FORMAT_B5G5R5A1_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_B8G8R8X8_UNORM, + PIPE_FORMAT_R10SG10SB10SA2U_NORM, + PIPE_FORMAT_B8G8R8A8_UNORM, /* TYPELESS */ + PIPE_FORMAT_B8G8R8A8_SRGB, + PIPE_FORMAT_B8G8R8X8_UNORM, /* TYPELESS */ + PIPE_FORMAT_B8G8R8X8_SRGB, + PIPE_FORMAT_NONE, /* PIPE_FORMAT_BC6H_TYPELESS */ + PIPE_FORMAT_NONE, /* PIPE_FORMAT_BC6H_UF16 */ + PIPE_FORMAT_NONE, /* PIPE_FORMAT_BC6H_SF16 */ + PIPE_FORMAT_NONE, /* PIPE_FORMAT_BC7_TYPELESS */ + PIPE_FORMAT_NONE, /* PIPE_FORMAT_BC7_UNORM */ + PIPE_FORMAT_NONE, /* PIPE_FORMAT_BC7_UNORM_SRGB */ +}; + +DXGI_FORMAT pipe_to_dxgi_format[PIPE_FORMAT_COUNT]; +static int pipe_to_dxgi_format_initialized; +void init_pipe_to_dxgi_format() +{ + if(!pipe_to_dxgi_format_initialized) + { + for(unsigned i = 0; i < DXGI_FORMAT_COUNT; ++i) + pipe_to_dxgi_format[dxgi_to_pipe_format[i]] = (DXGI_FORMAT)i; + pipe_to_dxgi_format[PIPE_FORMAT_NONE] = DXGI_FORMAT_UNKNOWN; + pipe_to_dxgi_format_initialized = 1; + } +} diff --git a/src/gallium/state_trackers/d3d1x/d3d1xstutil/src/guids.cpp b/src/gallium/state_trackers/d3d1x/d3d1xstutil/src/guids.cpp new file mode 100644 index 0000000000..ec45035b82 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3d1xstutil/src/guids.cpp @@ -0,0 +1,6 @@ +#define INITGUID +#include "d3d1xstutil.h" +#include <galliumd3d11.h> +#include <galliumd3d10_1.h> +#include <galliumdxgi.h> +#include <galliumcom.h> diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/Makefile b/src/gallium/state_trackers/d3d1x/d3dapi/Makefile new file mode 100644 index 0000000000..8b16b1bcbc --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/Makefile @@ -0,0 +1,4 @@ +all: idl + +include ../Makefile.inc + diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/d3d10.idl b/src/gallium/state_trackers/d3d1x/d3dapi/d3d10.idl new file mode 100644 index 0000000000..91b1abc24a --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/d3d10.idl @@ -0,0 +1,1554 @@ +/* + * Copyright 2007 Andras Kovacs + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* modified by Luca Barbieri on Sep 2010 to: + * - converted to using d3dcommon.idl + * - add missing D3D10_FORMAT_SUPPORT + * - add DXGI 1.1 D3D10_RESOURCE_MISC_SHARED_KEYEDMUTEX, D3D10_RESOURCE_MISC_GDI_COMPATIBLE +*/ +import "oaidl.idl"; +import "ocidl.idl"; +import "dxgi.idl"; +import "d3dcommon.idl"; + +cpp_quote("#ifndef _D3D10_CONSTANTS") +cpp_quote("#define _D3D10_CONSTANTS") +const float D3D10_DEFAULT_BLEND_FACTOR_ALPHA = 1.0; +const float D3D10_DEFAULT_BLEND_FACTOR_BLUE = 1.0; +const float D3D10_DEFAULT_BLEND_FACTOR_GREEN = 1.0; +const float D3D10_DEFAULT_BLEND_FACTOR_RED = 1.0; +const float D3D10_DEFAULT_BORDER_COLOR_COMPONENT = 0.0; +const float D3D10_DEFAULT_SLOPE_SCALED_DEPTH_BIAS = 0.0; +const float D3D10_DEFAULT_DEPTH_BIAS_CLAMP = 0.0; +const float D3D10_DEFAULT_MAX_ANISOTROPY = 16.0; +const float D3D10_DEFAULT_MIP_LOD_BIAS = 0.0; +const float D3D10_DEFAULT_VIEWPORT_MAX_DEPTH = 0.0; +const float D3D10_DEFAULT_VIEWPORT_MIN_DEPTH = 0.0; +const float D3D10_FLOAT16_FUSED_TOLERANCE_IN_ULP = 0.6; +const float D3D10_FLOAT32_MAX = 3.402823466e+38; +const float D3D10_FLOAT32_TO_INTEGER_TOLERANCE_IN_ULP = 0.6; +const float D3D10_FLOAT_TO_SRGB_EXPONENT_DENOMINATOR = 2.4; +const float D3D10_FLOAT_TO_SRGB_EXPONENT_NUMERATOR = 1.0; +const float D3D10_FLOAT_TO_SRGB_OFFSET = 0.055; +const float D3D10_FLOAT_TO_SRGB_SCALE_1 = 12.92; +const float D3D10_FLOAT_TO_SRGB_SCALE_2 = 1.055; +const float D3D10_FLOAT_TO_SRGB_THRESHOLD = 0.0031308; +const float D3D10_FTOI_INSTRUCTION_MAX_INPUT = 2147483647.999; +const float D3D10_FTOI_INSTRUCTION_MIN_INPUT = -2147483648.999; +const float D3D10_FTOU_INSTRUCTION_MAX_INPUT = 4294967295.999; +const float D3D10_FTOU_INSTRUCTION_MIN_INPUT = 0.0; +const float D3D10_LINEAR_GAMMA = 1.0; +const float D3D10_MAX_BORDER_COLOR_COMPONENT = 1.0; +const float D3D10_MAX_DEPTH = 1.0; +const float D3D10_MAX_POSITION_VALUE = 3.402823466e+34; +const float D3D10_MIN_BORDER_COLOR_COMPONENT = 0.0; +const float D3D10_MIN_DEPTH = 0.0; +const float D3D10_MIP_LOD_BIAS_MAX = 15.99; +const float D3D10_MIP_LOD_BIAS_MIN = -16.0; +const float D3D10_PS_PIXEL_CENTER_FRACTIONAL_COMPONENT = 0.5; +const float D3D10_MULTISAMPLE_ANTIALIAS_LINE_WIDTH = 1.4; +const float D3D10_SRGB_GAMMA = 2.2; +const float D3D10_SRGB_TO_FLOAT_DENOMINATOR_1 = 12.92; +const float D3D10_SRGB_TO_FLOAT_DENOMINATOR_2 = 1.055; +const float D3D10_SRGB_TO_FLOAT_EXPONENT = 2.4; +const float D3D10_SRGB_TO_FLOAT_OFFSET = 0.055; +const float D3D10_SRGB_TO_FLOAT_THRESHOLD = 0.04045; +const float D3D10_SRGB_TO_FLOAT_TOLERANCE_IN_ULP = 0.5; +const float D3D10_PS_LEGACY_PIXEL_CENTER_FRACTIONAL_COMPONENT = 0.0; +const float D3D_SPEC_VERSION = 1.050005; +const unsigned int D3D10_16BIT_INDEX_STRIP_CUT_VALUE = 0xffff; +const unsigned int D3D10_32BIT_INDEX_STRIP_CUT_VALUE = 0xffffffff; +const unsigned int D3D10_8BIT_INDEX_STRIP_CUT_VALUE = 0xff; +const unsigned int D3D10_ARRAY_AXIS_ADDRESS_RANGE_BIT_COUNT = 9; +const unsigned int D3D10_CLIP_OR_CULL_DISTANCE_COUNT = 8; +const unsigned int D3D10_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT = 2; +const unsigned int D3D10_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT = 14; +const unsigned int D3D10_COMMONSHADER_CONSTANT_BUFFER_COMPONENTS = 4; +const unsigned int D3D10_COMMONSHADER_CONSTANT_BUFFER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_COMMONSHADER_CONSTANT_BUFFER_HW_SLOT_COUNT = 15; +const unsigned int D3D10_COMMONSHADER_CONSTANT_BUFFER_REGISTER_COMPONENTS = 4; +const unsigned int D3D10_COMMONSHADER_CONSTANT_BUFFER_REGISTER_COUNT = 15; +const unsigned int D3D10_COMMONSHADER_CONSTANT_BUFFER_REGISTER_READS_PER_INST = 1; +const unsigned int D3D10_COMMONSHADER_CONSTANT_BUFFER_REGISTER_READ_PORTS = 1; +const unsigned int D3D10_COMMONSHADER_FLOWCONTROL_NESTING_LIMIT = 64; +const unsigned int D3D10_COMMONSHADER_IMMEDIATE_CONSTANT_BUFFER_REGISTER_COMPONENTS = 4; +const unsigned int D3D10_COMMONSHADER_IMMEDIATE_CONSTANT_BUFFER_REGISTER_COUNT = 1; +const unsigned int D3D10_COMMONSHADER_IMMEDIATE_CONSTANT_BUFFER_REGISTER_READS_PER_INST = 1; +const unsigned int D3D10_COMMONSHADER_IMMEDIATE_CONSTANT_BUFFER_REGISTER_READ_PORTS = 1; +const unsigned int D3D10_COMMONSHADER_IMMEDIATE_VALUE_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_COMMONSHADER_INPUT_RESOURCE_REGISTER_COMPONENTS = 1; +const unsigned int D3D10_COMMONSHADER_INPUT_RESOURCE_REGISTER_COUNT = 128; +const unsigned int D3D10_COMMONSHADER_INPUT_RESOURCE_REGISTER_READS_PER_INST = 1; +const unsigned int D3D10_COMMONSHADER_INPUT_RESOURCE_REGISTER_READ_PORTS = 1; +const unsigned int D3D10_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT = 128; +const unsigned int D3D10_COMMONSHADER_SAMPLER_REGISTER_COMPONENTS = 1; +const unsigned int D3D10_COMMONSHADER_SAMPLER_REGISTER_COUNT = 16; +const unsigned int D3D10_COMMONSHADER_SAMPLER_REGISTER_READS_PER_INST = 1; +const unsigned int D3D10_COMMONSHADER_SAMPLER_REGISTER_READ_PORTS = 1; +const unsigned int D3D10_COMMONSHADER_SAMPLER_SLOT_COUNT = 16; +const unsigned int D3D10_COMMONSHADER_SUBROUTINE_NESTING_LIMIT = 32; +const unsigned int D3D10_COMMONSHADER_TEMP_REGISTER_COMPONENTS = 4; +const unsigned int D3D10_COMMONSHADER_TEMP_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_COMMONSHADER_TEMP_REGISTER_COUNT = 4096; +const unsigned int D3D10_COMMONSHADER_TEMP_REGISTER_READS_PER_INST = 3; +const unsigned int D3D10_COMMONSHADER_TEMP_REGISTER_READ_PORTS = 3; +const unsigned int D3D10_COMMONSHADER_TEXCOORD_RANGE_REDUCTION_MAX = 10; +const int D3D10_COMMONSHADER_TEXCOORD_RANGE_REDUCTION_MIN = -10; +const int D3D10_COMMONSHADER_TEXEL_OFFSET_MAX_NEGATIVE = -8; +const unsigned int D3D10_COMMONSHADER_TEXEL_OFFSET_MAX_POSITIVE = 7; +const unsigned int D3D10_DEFAULT_DEPTH_BIAS = 0; +const unsigned int D3D10_DEFAULT_RENDER_TARGET_ARRAY_INDEX = 0; +const unsigned int D3D10_DEFAULT_SAMPLE_MASK = 0xffffffff; +const unsigned int D3D10_DEFAULT_SCISSOR_ENDX = 0; +const unsigned int D3D10_DEFAULT_SCISSOR_ENDY = 0; +const unsigned int D3D10_DEFAULT_SCISSOR_STARTX = 0; +const unsigned int D3D10_DEFAULT_SCISSOR_STARTY = 0; +const unsigned int D3D10_DEFAULT_STENCIL_READ_MASK = 0xff; +const unsigned int D3D10_DEFAULT_STENCIL_REFERENCE = 0; +const unsigned int D3D10_DEFAULT_STENCIL_WRITE_MASK = 0xff; +const unsigned int D3D10_DEFAULT_VIEWPORT_AND_SCISSORRECT_INDEX = 0; +const unsigned int D3D10_DEFAULT_VIEWPORT_HEIGHT = 0; +const unsigned int D3D10_DEFAULT_VIEWPORT_TOPLEFTX = 0; +const unsigned int D3D10_DEFAULT_VIEWPORT_TOPLEFTY = 0; +const unsigned int D3D10_DEFAULT_VIEWPORT_WIDTH = 0; +const unsigned int D3D10_GS_INPUT_PRIM_CONST_REGISTER_COMPONENTS = 1; +const unsigned int D3D10_GS_INPUT_PRIM_CONST_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_GS_INPUT_PRIM_CONST_REGISTER_COUNT = 1; +const unsigned int D3D10_GS_INPUT_PRIM_CONST_REGISTER_READS_PER_INST = 2; +const unsigned int D3D10_GS_INPUT_PRIM_CONST_REGISTER_READ_PORTS = 1; +const unsigned int D3D10_GS_INPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D10_GS_INPUT_REGISTER_COMPONENT_BIT_COUNT =32; +const unsigned int D3D10_GS_INPUT_REGISTER_COUNT = 16; +const unsigned int D3D10_GS_INPUT_REGISTER_READS_PER_INST = 2; +const unsigned int D3D10_GS_INPUT_REGISTER_READ_PORTS = 1; +const unsigned int D3D10_GS_INPUT_REGISTER_VERTICES = 6; +const unsigned int D3D10_GS_OUTPUT_ELEMENTS = 32; +const unsigned int D3D10_GS_OUTPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D10_GS_OUTPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_GS_OUTPUT_REGISTER_COUNT = 32; +const unsigned int D3D10_IA_DEFAULT_INDEX_BUFFER_OFFSET_IN_BYTES = 0; +const unsigned int D3D10_IA_DEFAULT_PRIMITIVE_TOPOLOGY = 0; +const unsigned int D3D10_IA_DEFAULT_VERTEX_BUFFER_OFFSET_IN_BYTES = 0; +const unsigned int D3D10_IA_INDEX_INPUT_RESOURCE_SLOT_COUNT = 1; +const unsigned int D3D10_IA_INSTANCE_ID_BIT_COUNT = 32; +const unsigned int D3D10_IA_INTEGER_ARITHMETIC_BIT_COUNT = 32; +const unsigned int D3D10_IA_PRIMITIVE_ID_BIT_COUNT = 32; +const unsigned int D3D10_IA_VERTEX_ID_BIT_COUNT = 32; +const unsigned int D3D10_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT = 16; +const unsigned int D3D10_IA_VERTEX_INPUT_STRUCTURE_ELEMENTS_COMPONENTS = 64; +const unsigned int D3D10_IA_VERTEX_INPUT_STRUCTURE_ELEMENT_COUNT = 16; +const unsigned int D3D10_INTEGER_DIVIDE_BY_ZERO_QUOTIENT = 0xffffffff; +const unsigned int D3D10_INTEGER_DIVIDE_BY_ZERO_REMAINDER = 0xffffffff; +const unsigned int D3D10_MAX_MAXANISOTROPY = 16; +const unsigned int D3D10_MAX_MULTISAMPLE_SAMPLE_COUNT = 32; +const unsigned int D3D10_MAX_TEXTURE_DIMENSION_2_TO_EXP = 17; +const unsigned int D3D10_MIN_MAXANISOTROPY = 0; +const unsigned int D3D10_MIP_LOD_FRACTIONAL_BIT_COUNT = 6; +const unsigned int D3D10_MIP_LOD_RANGE_BIT_COUNT = 8; +const unsigned int D3D10_NONSAMPLE_FETCH_OUT_OF_RANGE_ACCESS_RESULT = 0; +const unsigned int D3D10_PIXEL_ADDRESS_RANGE_BIT_COUNT = 13; +const unsigned int D3D10_PRE_SCISSOR_PIXEL_ADDRESS_RANGE_BIT_COUNT = 15; +const unsigned int D3D10_PS_FRONTFACING_DEFAULT_VALUE = 0xffffffff; +const unsigned int D3D10_PS_FRONTFACING_FALSE_VALUE = 0; +const unsigned int D3D10_PS_FRONTFACING_TRUE_VALUE = 0xffffffff; +const unsigned int D3D10_PS_INPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D10_PS_INPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_PS_INPUT_REGISTER_COUNT = 32; +const unsigned int D3D10_PS_INPUT_REGISTER_READS_PER_INST = 2; +const unsigned int D3D10_PS_INPUT_REGISTER_READ_PORTS = 1; +const unsigned int D3D10_PS_OUTPUT_DEPTH_REGISTER_COMPONENTS = 1; +const unsigned int D3D10_PS_OUTPUT_DEPTH_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_PS_OUTPUT_DEPTH_REGISTER_COUNT = 1; +const unsigned int D3D10_PS_OUTPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D10_PS_OUTPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_PS_OUTPUT_REGISTER_COUNT = 8; +const unsigned int D3D10_REQ_BLEND_OBJECT_COUNT_PER_CONTEXT = 4096; +const unsigned int D3D10_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP = 27; +const unsigned int D3D10_REQ_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; +const unsigned int D3D10_REQ_DEPTH_STENCIL_OBJECT_COUNT_PER_CONTEXT = 4096; +const unsigned int D3D10_REQ_DRAWINDEXED_INDEX_COUNT_2_TO_EXP = 32; +const unsigned int D3D10_REQ_DRAW_VERTEX_COUNT_2_TO_EXP = 32; +const unsigned int D3D10_REQ_FILTERING_HW_ADDRESSABLE_RESOURCE_DIMENSION = 8192; +const unsigned int D3D10_REQ_GS_INVOCATION_32BIT_OUTPUT_COMPONENT_LIMIT = 1024; +const unsigned int D3D10_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; +const unsigned int D3D10_REQ_MAXANISOTROPY = 16; +const unsigned int D3D10_REQ_MIP_LEVELS = 14; +const unsigned int D3D10_REQ_MULTI_ELEMENT_STRUCTURE_SIZE_IN_BYTES = 2048; +const unsigned int D3D10_REQ_RASTERIZER_OBJECT_COUNT_PER_CONTEXT = 4096; +const unsigned int D3D10_REQ_RENDER_TO_BUFFER_WINDOW_WIDTH = 8192; +const unsigned int D3D10_REQ_RESOURCE_SIZE_IN_MEGABYTES = 128; +const unsigned int D3D10_REQ_RESOURCE_VIEW_COUNT_PER_CONTEXT_2_TO_EXP = 20; +const unsigned int D3D10_REQ_SAMPLER_OBJECT_COUNT_PER_CONTEXT = 4096; +const unsigned int D3D10_REQ_TEXTURE1D_ARRAY_AXIS_DIMENSION = 512; +const unsigned int D3D10_REQ_TEXTURE1D_U_DIMENSION = 8192; +const unsigned int D3D10_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION = 512; +const unsigned int D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION = 8192; +const unsigned int D3D10_REQ_TEXTURE3D_U_V_OR_W_DIMENSION = 2048; +const unsigned int D3D10_REQ_TEXTURECUBE_DIMENSION = 8192; +const unsigned int D3D10_RESINFO_INSTRUCTION_MISSING_COMPONENT_RETVAL = 0; +const unsigned int D3D10_SHADER_MAJOR_VERSION = 4; +const unsigned int D3D10_SHADER_MINOR_VERSION = 0; +const unsigned int D3D10_SHIFT_INSTRUCTION_PAD_VALUE = 0; +const unsigned int D3D10_SHIFT_INSTRUCTION_SHIFT_VALUE_BIT_COUNT = 5; +const unsigned int D3D10_SIMULTANEOUS_RENDER_TARGET_COUNT = 8; +const unsigned int D3D10_SO_BUFFER_MAX_STRIDE_IN_BYTES = 2048; +const unsigned int D3D10_SO_BUFFER_MAX_WRITE_WINDOW_IN_BYTES = 256; +const unsigned int D3D10_SO_BUFFER_SLOT_COUNT = 4; +const unsigned int D3D10_SO_DDI_REGISTER_INDEX_DENOTING_GAP = 0xffffffff; +const unsigned int D3D10_SO_MULTIPLE_BUFFER_ELEMENTS_PER_BUFFER = 1; +const unsigned int D3D10_SO_SINGLE_BUFFER_COMPONENT_LIMIT = 64; +const unsigned int D3D10_STANDARD_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_STANDARD_COMPONENT_BIT_COUNT_DOUBLED = 64; +const unsigned int D3D10_STANDARD_MAXIMUM_ELEMENT_ALIGNMENT_BYTE_MULTIPLE = 4; +const unsigned int D3D10_STANDARD_PIXEL_COMPONENT_COUNT = 128; +const unsigned int D3D10_STANDARD_PIXEL_ELEMENT_COUNT = 32; +const unsigned int D3D10_STANDARD_VECTOR_SIZE = 4; +const unsigned int D3D10_STANDARD_VERTEX_ELEMENT_COUNT = 16; +const unsigned int D3D10_STANDARD_VERTEX_TOTAL_COMPONENT_COUNT = 64; +const unsigned int D3D10_SUBPIXEL_FRACTIONAL_BIT_COUNT = 8; +const unsigned int D3D10_SUBTEXEL_FRACTIONAL_BIT_COUNT = 6; +const unsigned int D3D10_TEXEL_ADDRESS_RANGE_BIT_COUNT = 18; +const unsigned int D3D10_UNBOUND_MEMORY_ACCESS_RESULT = 0; +const unsigned int D3D10_VIEWPORT_AND_SCISSORRECT_MAX_INDEX = 15; +const unsigned int D3D10_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE = 16; +const unsigned int D3D10_VIEWPORT_BOUNDS_MAX = 16383; +const int D3D10_VIEWPORT_BOUNDS_MIN = -16384; +const unsigned int D3D10_VS_INPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D10_VS_INPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_VS_INPUT_REGISTER_COUNT = 16; +const unsigned int D3D10_VS_INPUT_REGISTER_READS_PER_INST = 2; +const unsigned int D3D10_VS_INPUT_REGISTER_READ_PORTS = 1; +const unsigned int D3D10_VS_OUTPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D10_VS_OUTPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_VS_OUTPUT_REGISTER_COUNT = 16; +const unsigned int D3D10_WHQL_CONTEXT_COUNT_FOR_RESOURCE_LIMIT = 10; +const unsigned int D3D10_WHQL_DRAWINDEXED_INDEX_COUNT_2_TO_EXP = 25; +const unsigned int D3D10_WHQL_DRAW_VERTEX_COUNT_2_TO_EXP = 25; +const unsigned int D3D_MAJOR_VERSION = 10; +const unsigned int D3D_MINOR_VERSION = 0; +const unsigned int D3D_SPEC_DATE_DAY = 8; +const unsigned int D3D_SPEC_DATE_MONTH = 8; +const unsigned int D3D_SPEC_DATE_YEAR = 2006; +cpp_quote("#endif") + +const unsigned int D3D10_APPEND_ALIGNED_ELEMENT = 0xffffffff; +const unsigned int _FACD3D10 = 0x87; +const unsigned int _FACD3D10DEBUG = _FACD3D10 + 1; +const unsigned int D3D10_FILTER_TYPE_MASK = 0x3; +const unsigned int D3D10_SDK_VERSION = 29; + +cpp_quote("#define MAKE_D3D10_HRESULT(code) MAKE_HRESULT( 1, _FACD3D10, code)") +cpp_quote("#define MAKE_D3D10_STATUS(code) MAKE_HRESULT( 0, _FACD3D10, code)") +cpp_quote("#define D3D10_ERROR_TOO_MANY_UNIQUE_STATE_OBJECTS MAKE_D3D10_HRESULT(1)") +cpp_quote("#define D3D10_ERROR_FILE_NOT_FOUND MAKE_D3D10_HRESULT(2)") + +typedef enum D3D10_FORMAT_SUPPORT +{ + D3D10_FORMAT_SUPPORT_BUFFER = 0x1, + D3D10_FORMAT_SUPPORT_IA_VERTEX_BUFFER = 0x2, + D3D10_FORMAT_SUPPORT_IA_INDEX_BUFFER = 0x4, + D3D10_FORMAT_SUPPORT_SO_BUFFER = 0x8, + D3D10_FORMAT_SUPPORT_TEXTURE1D = 0x10, + D3D10_FORMAT_SUPPORT_TEXTURE2D = 0x20, + D3D10_FORMAT_SUPPORT_TEXTURE3D = 0x40, + D3D10_FORMAT_SUPPORT_TEXTURECUBE = 0x80, + D3D10_FORMAT_SUPPORT_SHADER_LOAD = 0x100, + D3D10_FORMAT_SUPPORT_SHADER_SAMPLE = 0x200, + D3D10_FORMAT_SUPPORT_SHADER_SAMPLE_COMPARISON = 0x400, + D3D10_FORMAT_SUPPORT_SHADER_SAMPLE_MONO_TEXT = 0x800, + D3D10_FORMAT_SUPPORT_MIP = 0x1000, + D3D10_FORMAT_SUPPORT_MIP_AUTOGEN = 0x2000, + D3D10_FORMAT_SUPPORT_RENDER_TARGET = 0x4000, + D3D10_FORMAT_SUPPORT_BLENDABLE = 0x8000, + D3D10_FORMAT_SUPPORT_DEPTH_STENCIL = 0x10000, + D3D10_FORMAT_SUPPORT_CPU_LOCKABLE = 0x20000, + D3D10_FORMAT_SUPPORT_MULTISAMPLE_RESOLVE = 0x40000, + D3D10_FORMAT_SUPPORT_DISPLAY = 0x80000, + D3D10_FORMAT_SUPPORT_CAST_WITHIN_BIT_LAYOUT = 0x100000, + D3D10_FORMAT_SUPPORT_MULTISAMPLE_RENDERTARGET = 0x200000, + D3D10_FORMAT_SUPPORT_MULTISAMPLE_LOAD = 0x400000, + D3D10_FORMAT_SUPPORT_SHADER_GATHER = 0x800000, +} D3D10_FORMAT_SUPPORT; + + +typedef enum D3D10_BLEND { + D3D10_BLEND_ZERO = 1, + D3D10_BLEND_ONE = 2, + D3D10_BLEND_SRC_COLOR = 3, + D3D10_BLEND_INV_SRC_COLOR = 4, + D3D10_BLEND_SRC_ALPHA = 5, + D3D10_BLEND_INV_SRC_ALPHA = 6, + D3D10_BLEND_DEST_ALPHA = 7, + D3D10_BLEND_INV_DEST_ALPHA = 8, + D3D10_BLEND_DEST_COLOR = 9, + D3D10_BLEND_INV_DEST_COLOR = 10, + D3D10_BLEND_SRC_ALPHA_SAT = 11, + D3D10_BLEND_BLEND_FACTOR = 14, + D3D10_BLEND_INV_BLEND_FACTOR = 15, + D3D10_BLEND_SRC1_COLOR = 16, + D3D10_BLEND_INV_SRC1_COLOR = 17, + D3D10_BLEND_SRC1_ALPHA = 18, + D3D10_BLEND_INV_SRC1_ALPHA = 19 +} D3D10_BLEND; + +typedef enum D3D10_BLEND_OP { + D3D10_BLEND_OP_ADD = 1, + D3D10_BLEND_OP_SUBTRACT, + D3D10_BLEND_OP_REV_SUBTRACT, + D3D10_BLEND_OP_MIN, + D3D10_BLEND_OP_MAX, +} D3D10_BLEND_OP; + +typedef struct D3D10_BLEND_DESC { + BOOL AlphaToCoverageEnable; + BOOL BlendEnable[8]; + D3D10_BLEND SrcBlend; + D3D10_BLEND DestBlend; + D3D10_BLEND_OP BlendOp; + D3D10_BLEND SrcBlendAlpha; + D3D10_BLEND DestBlendAlpha; + D3D10_BLEND_OP BlendOpAlpha; + UINT8 RenderTargetWriteMask[8]; +} D3D10_BLEND_DESC; + +typedef enum D3D10_DEPTH_WRITE_MASK { + D3D10_DEPTH_WRITE_MASK_ZERO, + D3D10_DEPTH_WRITE_MASK_ALL, +} D3D10_DEPTH_WRITE_MASK; + +typedef enum D3D10_COMPARISON_FUNC { + D3D10_COMPARISON_NEVER = 1, + D3D10_COMPARISON_LESS, + D3D10_COMPARISON_EQUAL, + D3D10_COMPARISON_LESS_EQUAL, + D3D10_COMPARISON_GREATER, + D3D10_COMPARISON_NOT_EQUAL, + D3D10_COMPARISON_GREATER_EQUAL, + D3D10_COMPARISON_ALWAYS, +} D3D10_COMPARISON_FUNC; + +typedef enum D3D10_STENCIL_OP { + D3D10_STENCIL_OP_KEEP = 1, + D3D10_STENCIL_OP_ZERO, + D3D10_STENCIL_OP_REPLACE, + D3D10_STENCIL_OP_INCR_SAT, + D3D10_STENCIL_OP_DECR_SAT, + D3D10_STENCIL_OP_INVERT, + D3D10_STENCIL_OP_INCR, + D3D10_STENCIL_OP_DECR, +} D3D10_STENCIL_OP; + +typedef struct D3D10_DEPTH_STENCILOP_DESC { + D3D10_STENCIL_OP StencilFailOp; + D3D10_STENCIL_OP StencilDepthFailOp; + D3D10_STENCIL_OP StencilPassOp; + D3D10_COMPARISON_FUNC StencilFunc; +} D3D10_DEPTH_STENCILOP_DESC; + +typedef struct D3D10_DEPTH_STENCIL_DESC { + BOOL DepthEnable; + D3D10_DEPTH_WRITE_MASK DepthWriteMask; + D3D10_COMPARISON_FUNC DepthFunc; + BOOL StencilEnable; + UINT8 StencilReadMask; + UINT8 StencilWriteMask; + D3D10_DEPTH_STENCILOP_DESC FrontFace; + D3D10_DEPTH_STENCILOP_DESC BackFace; +} D3D10_DEPTH_STENCIL_DESC; + +typedef enum D3D10_FILL_MODE { + D3D10_FILL_WIREFRAME = 2, + D3D10_FILL_SOLID, +} D3D10_FILL_MODE; + +typedef enum D3D10_CULL_MODE { + D3D10_CULL_NONE = 1, + D3D10_CULL_FRONT, + D3D10_CULL_BACK, +} D3D10_CULL_MODE; + +typedef struct D3D10_RASTERIZER_DESC { + D3D10_FILL_MODE FillMode; + D3D10_CULL_MODE CullMode; + BOOL FrontCounterClockwise; + INT DepthBias; + FLOAT DepthBiasClamp; + FLOAT SlopeScaledDepthBias; + BOOL DepthClipEnable; + BOOL ScissorEnable; + BOOL MultisampleEnable; + BOOL AntialiasedLineEnable; +} D3D10_RASTERIZER_DESC; + +typedef enum D3D10_FILTER { + D3D10_FILTER_MIN_MAG_MIP_POINT = 0, + D3D10_FILTER_MIN_MAG_POINT_MIP_LINEAR = 0x1, + D3D10_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT = 0x4, + D3D10_FILTER_MIN_POINT_MAG_MIP_LINEAR = 0x5, + D3D10_FILTER_MIN_LINEAR_MAG_MIP_POINT = 0x10, + D3D10_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR = 0x11, + D3D10_FILTER_MIN_MAG_LINEAR_MIP_POINT = 0x14, + D3D10_FILTER_MIN_MAG_MIP_LINEAR = 0x15, + D3D10_FILTER_ANISOTROPIC = 0x55, + D3D10_FILTER_COMPARISON_MIN_MAG_MIP_POINT = 0x80, + D3D10_FILTER_COMPARISON_MIN_MAG_POINT_MIP_LINEAR = 0x81, + D3D10_FILTER_COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT = 0x84, + D3D10_FILTER_COMPARISON_MIN_POINT_MAG_MIP_LINEAR = 0x85, + D3D10_FILTER_COMPARISON_MIN_LINEAR_MAG_MIP_POINT = 0x90, + D3D10_FILTER_COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR = 0x91, + D3D10_FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT = 0x94, + D3D10_FILTER_COMPARISON_MIN_MAG_MIP_LINEAR = 0x95, + D3D10_FILTER_COMPARISON_ANISOTROPIC = 0xd5, + D3D10_FILTER_TEXT_1BIT = 0x80000000 +} D3D10_FILTER; + +typedef enum D3D10_TEXTURE_ADDRESS_MODE { + D3D10_TEXTURE_ADDRESS_WRAP = 1, + D3D10_TEXTURE_ADDRESS_MIRROR, + D3D10_TEXTURE_ADDRESS_CLAMP, + D3D10_TEXTURE_ADDRESS_BORDER, + D3D10_TEXTURE_ADDRESS_MIRROR_ONCE, +} D3D10_TEXTURE_ADDRESS_MODE; + +typedef struct D3D10_SAMPLER_DESC { + D3D10_FILTER Filter; + D3D10_TEXTURE_ADDRESS_MODE AddressU; + D3D10_TEXTURE_ADDRESS_MODE AddressV; + D3D10_TEXTURE_ADDRESS_MODE AddressW; + FLOAT MipLODBias; + UINT MaxAnisotropy; + D3D10_COMPARISON_FUNC ComparisonFunc; + FLOAT BorderColor[4]; + FLOAT MinLOD; + FLOAT MaxLOD; +} D3D10_SAMPLER_DESC; + +typedef enum D3D10_COUNTER { + D3D10_COUNTER_GPU_IDLE, + D3D10_COUNTER_VERTEX_PROCESSING, + D3D10_COUNTER_GEOMETRY_PROCESSING, + D3D10_COUNTER_PIXEL_PROCESSING, + D3D10_COUNTER_OTHER_GPU_PROCESSING, + D3D10_COUNTER_HOST_ADAPTER_BANDWIDTH_UTILIZATION, + D3D10_COUNTER_LOCAL_VIDMEM_BANDWIDTH_UTILIZATION, + D3D10_COUNTER_VERTEX_THROUGHPUT_UTILIZATION, + D3D10_COUNTER_TRIANGLE_SETUP_THROUGHPUT_UTILIZATION, + D3D10_COUNTER_FILLRATE_THROUGHPUT_UTILIZATION, + D3D10_COUNTER_VS_MEMORY_LIMITED, + D3D10_COUNTER_VS_COMPUTATION_LIMITED, + D3D10_COUNTER_GS_MEMORY_LIMITED, + D3D10_COUNTER_GS_COMPUTATION_LIMITED, + D3D10_COUNTER_PS_MEMORY_LIMITED, + D3D10_COUNTER_PS_COMPUTATION_LIMITED, + D3D10_COUNTER_POST_TRANSFORM_CACHE_HIT_RATE, + D3D10_COUNTER_TEXTURE_CACHE_HIT_RATE, + D3D10_COUNTER_DEVICE_DEPENDENT_0 = 0x40000000 +} D3D10_COUNTER; + +typedef struct D3D10_COUNTER_DESC { + D3D10_COUNTER Counter; + UINT MiscFlags; +} D3D10_COUNTER_DESC; + +typedef enum D3D10_COUNTER_TYPE { + D3D10_COUNTER_TYPE_FLOAT32, + D3D10_COUNTER_TYPE_UINT16, + D3D10_COUNTER_TYPE_UINT32, + D3D10_COUNTER_TYPE_UINT64, +} D3D10_COUNTER_TYPE; + +typedef struct D3D10_COUNTER_INFO { + D3D10_COUNTER LastDeviceDependentCounter; + UINT NumSimultaneousCounters; + UINT8 NumDetectableParallelUnits; +} D3D10_COUNTER_INFO; + +typedef enum D3D10_RESOURCE_DIMENSION { + D3D10_RESOURCE_DIMENSION_UNKNOWN, + D3D10_RESOURCE_DIMENSION_BUFFER, + D3D10_RESOURCE_DIMENSION_TEXTURE1D, + D3D10_RESOURCE_DIMENSION_TEXTURE2D, + D3D10_RESOURCE_DIMENSION_TEXTURE3D, +} D3D10_RESOURCE_DIMENSION; + +typedef enum D3D10_USAGE { + D3D10_USAGE_DEFAULT, + D3D10_USAGE_IMMUTABLE, + D3D10_USAGE_DYNAMIC, + D3D10_USAGE_STAGING, +} D3D10_USAGE; + +typedef struct D3D10_BUFFER_DESC { + UINT ByteWidth; + D3D10_USAGE Usage; + UINT BindFlags; + UINT CPUAccessFlags; + UINT MiscFlags; +} D3D10_BUFFER_DESC; + +typedef enum D3D10_MAP { + D3D10_MAP_READ = 1, + D3D10_MAP_WRITE, + D3D10_MAP_READ_WRITE, + D3D10_MAP_WRITE_DISCARD, + D3D10_MAP_WRITE_NO_OVERWRITE, +} D3D10_MAP; + +typedef struct D3D10_TEXTURE1D_DESC { + UINT Width; + UINT MipLevels; + UINT ArraySize; + DXGI_FORMAT Format; + D3D10_USAGE Usage; + UINT BindFlags; + UINT CPUAccessFlags; + UINT MiscFlags; +} D3D10_TEXTURE1D_DESC; + +typedef struct D3D10_TEXTURE2D_DESC { + UINT Width; + UINT Height; + UINT MipLevels; + UINT ArraySize; + DXGI_FORMAT Format; + DXGI_SAMPLE_DESC SampleDesc; + D3D10_USAGE Usage; + UINT BindFlags; + UINT CPUAccessFlags; + UINT MiscFlags; +} D3D10_TEXTURE2D_DESC; + +typedef struct D3D10_TEXTURE3D_DESC { + UINT Width; + UINT Height; + UINT Depth; + UINT MipLevels; + DXGI_FORMAT Format; + D3D10_USAGE Usage; + UINT BindFlags; + UINT CPUAccessFlags; + UINT MiscFlags; +} D3D10_TEXTURE3D_DESC; + +typedef enum D3D10_DSV_DIMENSION +{ + D3D10_DSV_DIMENSION_UNKNOWN, + D3D10_DSV_DIMENSION_TEXTURE1D, + D3D10_DSV_DIMENSION_TEXTURE1DARRAY, + D3D10_DSV_DIMENSION_TEXTURE2D, + D3D10_DSV_DIMENSION_TEXTURE2DARRAY, + D3D10_DSV_DIMENSION_TEXTURE2DMS, + D3D10_DSV_DIMENSION_TEXTURE2DMSARRAY, +} D3D10_DSV_DIMENSION; + +typedef struct D3D10_TEX1D_DSV { + UINT MipSlice; +} D3D10_TEX1D_DSV; + +typedef struct D3D10_TEX1D_ARRAY_DSV { + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} D3D10_TEX1D_ARRAY_DSV; + +typedef struct D3D10_TEX2D_DSV { + UINT MipSlice; +} D3D10_TEX2D_DSV; + +typedef struct D3D10_TEX2D_ARRAY_DSV { + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} D3D10_TEX2D_ARRAY_DSV; + +typedef struct D3D10_TEX2DMS_DSV { + UINT UnusedField_NothingToDefine; +} D3D10_TEX2DMS_DSV; + +typedef struct D3D10_TEX2DMS_ARRAY_DSV { + UINT FirstArraySlice; + UINT ArraySize; +} D3D10_TEX2DMS_ARRAY_DSV; + +typedef struct D3D10_DEPTH_STENCIL_VIEW_DESC { + DXGI_FORMAT Format; + D3D10_DSV_DIMENSION ViewDimension; + union { + D3D10_TEX1D_DSV Texture1D; + D3D10_TEX1D_ARRAY_DSV Texture1DArray; + D3D10_TEX2D_DSV Texture2D; + D3D10_TEX2D_ARRAY_DSV Texture2DArray; + D3D10_TEX2DMS_DSV Texture2DMS; + D3D10_TEX2DMS_ARRAY_DSV Texture2DMSArray; + } DUMMYUNIONNAME; +} D3D10_DEPTH_STENCIL_VIEW_DESC; + +typedef enum D3D10_RTV_DIMENSION { + D3D10_RTV_DIMENSION_UNKNOWN, + D3D10_RTV_DIMENSION_BUFFER, + D3D10_RTV_DIMENSION_TEXTURE1D, + D3D10_RTV_DIMENSION_TEXTURE1DARRAY, + D3D10_RTV_DIMENSION_TEXTURE2D, + D3D10_RTV_DIMENSION_TEXTURE2DARRAY, + D3D10_RTV_DIMENSION_TEXTURE2DMS, + D3D10_RTV_DIMENSION_TEXTURE2DMSARRAY, + D3D10_RTV_DIMENSION_TEXTURE3D, +} D3D10_RTV_DIMENSION; + +typedef struct D3D10_BUFFER_RTV { + UINT ElementOffset; + UINT ElementWidth; +} D3D10_BUFFER_RTV; + +typedef struct D3D10_TEX1D_RTV { + UINT MipSlice; +} D3D10_TEX1D_RTV; + +typedef struct D3D10_TEX1D_ARRAY_RTV { + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} D3D10_TEX1D_ARRAY_RTV; + +typedef struct D3D10_TEX2D_RTV { + UINT MipSlice; +} D3D10_TEX2D_RTV; + +typedef struct D3D10_TEX2D_ARRAY_RTV { + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} D3D10_TEX2D_ARRAY_RTV; + +typedef struct D3D10_TEX2DMS_RTV { + UINT UnusedField_NothingToDefine; +} D3D10_TEX2DMS_RTV; + +typedef struct D3D10_TEX2DMS_ARRAY_RTV { + UINT FirstArraySlice; + UINT ArraySize; +} D3D10_TEX2DMS_ARRAY_RTV; + +typedef struct D3D10_TEX3D_RTV { + UINT MipSlice; + UINT FirstWSlice; + UINT WSize; +} D3D10_TEX3D_RTV; + +typedef struct D3D10_RENDER_TARGET_VIEW_DESC { + DXGI_FORMAT Format; + D3D10_RTV_DIMENSION ViewDimension; + union { + D3D10_BUFFER_RTV Buffer; + D3D10_TEX1D_RTV Texture1D; + D3D10_TEX1D_ARRAY_RTV Texture1DArray; + D3D10_TEX2D_RTV Texture2D; + D3D10_TEX2D_ARRAY_RTV Texture2DArray; + D3D10_TEX2DMS_RTV Texture2DMS; + D3D10_TEX2DMS_ARRAY_RTV Texture2DMSArray; + D3D10_TEX3D_RTV Texture3D; + } DUMMYUNIONNAME; +} D3D10_RENDER_TARGET_VIEW_DESC; + +typedef D3D_SRV_DIMENSION D3D10_SRV_DIMENSION; + +typedef struct D3D10_BUFFER_SRV { + UINT ElementOffset; + UINT ElementWidth; +} D3D10_BUFFER_SRV; + +typedef struct D3D10_TEX1D_SRV { + UINT MostDetailedMip; + UINT MipLevels; +} D3D10_TEX1D_SRV; + +typedef struct D3D10_TEX1D_ARRAY_SRV { + UINT MostDetailedMip; + UINT MipLevels; + UINT FirstArraySlice; + UINT ArraySize; +} D3D10_TEX1D_ARRAY_SRV; + +typedef struct D3D10_TEX2D_SRV { + UINT MostDetailedMip; + UINT MipLevels; +} D3D10_TEX2D_SRV; + +typedef struct D3D10_TEX2D_ARRAY_SRV { + UINT MostDetailedMip; + UINT MipLevels; + UINT FirstArraySlice; + UINT ArraySize; +} D3D10_TEX2D_ARRAY_SRV; + +typedef struct D3D10_TEX2DMS_SRV { + UINT UnusedField_NothingToDefine; +} D3D10_TEX2DMS_SRV; + +typedef struct D3D10_TEX2DMS_ARRAY_SRV { + UINT FirstArraySlice; + UINT ArraySize; +} D3D10_TEX2DMS_ARRAY_SRV; + +typedef struct D3D10_TEX3D_SRV { + UINT MostDetailedMip; + UINT MipLevels; +} D3D10_TEX3D_SRV; + +typedef struct D3D10_TEXCUBE_SRV { + UINT MostDetailedMip; + UINT MipLevels; +} D3D10_TEXCUBE_SRV; + +typedef struct D3D10_SHADER_RESOURCE_VIEW_DESC { + DXGI_FORMAT Format; + D3D10_SRV_DIMENSION ViewDimension; + union { + D3D10_BUFFER_SRV Buffer; + D3D10_TEX1D_SRV Texture1D; + D3D10_TEX1D_ARRAY_SRV Texture1DArray; + D3D10_TEX2D_SRV Texture2D; + D3D10_TEX2D_ARRAY_SRV Texture2DArray; + D3D10_TEX2DMS_SRV Texture2DMS; + D3D10_TEX2DMS_ARRAY_SRV Texture2DMSArray; + D3D10_TEX3D_SRV Texture3D; + D3D10_TEXCUBE_SRV TextureCube; + } DUMMYUNIONNAME; +} D3D10_SHADER_RESOURCE_VIEW_DESC; + +typedef struct D3D10_BOX { + UINT left; + UINT top; + UINT front; + UINT right; + UINT bottom; + UINT back; +} D3D10_BOX; + +typedef struct D3D10_SUBRESOURCE_DATA { + const void *pSysMem; + UINT SysMemPitch; + UINT SysMemSlicePitch; +} D3D10_SUBRESOURCE_DATA; + +typedef struct D3D10_SO_DECLARATION_ENTRY { + LPCSTR SemanticName; + UINT SemanticIndex; + BYTE StartComponent; + BYTE ComponentCount; + BYTE OutputSlot; +} D3D10_SO_DECLARATION_ENTRY; + +typedef enum D3D10_INPUT_CLASSIFICATION { + D3D10_INPUT_PER_VERTEX_DATA, + D3D10_INPUT_PER_INSTANCE_DATA, +} D3D10_INPUT_CLASSIFICATION; + +typedef struct D3D10_INPUT_ELEMENT_DESC { + LPCSTR SemanticName; + UINT SemanticIndex; + DXGI_FORMAT Format; + UINT InputSlot; + UINT AlignedByteOffset; + D3D10_INPUT_CLASSIFICATION InputSlotClass; + UINT InstanceDataStepRate; +} D3D10_INPUT_ELEMENT_DESC; + +typedef enum D3D10_QUERY { + D3D10_QUERY_EVENT, + D3D10_QUERY_OCCLUSION, + D3D10_QUERY_TIMESTAMP, + D3D10_QUERY_TIMESTAMP_DISJOINT, + D3D10_QUERY_PIPELINE_STATISTICS, + D3D10_QUERY_OCCLUSION_PREDICATE, + D3D10_QUERY_SO_STATISTICS, + D3D10_QUERY_SO_OVERFLOW_PREDICATE, +} D3D10_QUERY; + +typedef struct D3D10_QUERY_DESC { + D3D10_QUERY Query; + UINT MiscFlags; +} D3D10_QUERY_DESC; + +typedef D3D_PRIMITIVE_TOPOLOGY D3D10_PRIMITIVE_TOPOLOGY; + +typedef RECT D3D10_RECT; + +typedef struct D3D10_VIEWPORT { + INT TopLeftX; + INT TopLeftY; + UINT Width; + UINT Height; + FLOAT MinDepth; + FLOAT MaxDepth; +} D3D10_VIEWPORT; + +typedef struct D3D10_MAPPED_TEXTURE2D { + void *pData; + UINT RowPitch; +} D3D10_MAPPED_TEXTURE2D; + +typedef struct D3D10_MAPPED_TEXTURE3D { + void *pData; + UINT RowPitch; + UINT DepthPitch; +} D3D10_MAPPED_TEXTURE3D; + +typedef enum D3D10_BIND_FLAG { + D3D10_BIND_VERTEX_BUFFER = 0x1, + D3D10_BIND_INDEX_BUFFER = 0x2, + D3D10_BIND_CONSTANT_BUFFER = 0x4, + D3D10_BIND_SHADER_RESOURCE = 0x8, + D3D10_BIND_STREAM_OUTPUT = 0x10, + D3D10_BIND_RENDER_TARGET = 0x20, + D3D10_BIND_DEPTH_STENCIL = 0x40 +} D3D10_BIND_FLAG; + +typedef enum D3D10_CPU_ACCESS_FLAG { + D3D10_CPU_ACCESS_WRITE = 0x10000, + D3D10_CPU_ACCESS_READ = 0x20000 +} D3D10_CPU_ACCESS_FLAG; + +typedef enum D3D10_RESOURCE_MISC_FLAG { + D3D10_RESOURCE_MISC_GENERATE_MIPS = 0x1, + D3D10_RESOURCE_MISC_SHARED = 0x2, + D3D10_RESOURCE_MISC_TEXTURECUBE = 0x4, + D3D10_RESOURCE_MISC_SHARED_KEYEDMUTEX = 0x10L, + D3D10_RESOURCE_MISC_GDI_COMPATIBLE = 0x20L +} D3D10_RESOURCE_MISC_FLAG; + +typedef enum D3D10_MAP_FLAG { + D3D10_MAP_FLAG_DO_NOT_WAIT = 0x100000, +} D3D10_MAP_FLAG; + +typedef enum D3D10_CLEAR_FLAG { + D3D10_CLEAR_DEPTH = 0x1, + D3D10_CLEAR_STENCIL = 0x2 +} D3D10_CLEAR_FLAG; + +typedef enum D3D10_COLOR_WRITE_ENABLE { + D3D10_COLOR_WRITE_ENABLE_RED = 0x1, + D3D10_COLOR_WRITE_ENABLE_GREEN = 0x2, + D3D10_COLOR_WRITE_ENABLE_BLUE = 0x4, + D3D10_COLOR_WRITE_ENABLE_ALPHA = 0x8, + D3D10_COLOR_WRITE_ENABLE_ALL = (D3D10_COLOR_WRITE_ENABLE_RED | D3D10_COLOR_WRITE_ENABLE_GREEN | + D3D10_COLOR_WRITE_ENABLE_BLUE | D3D10_COLOR_WRITE_ENABLE_ALPHA) +} D3D10_COLOR_WRITE_ENABLE; + +typedef enum D3D10_TEXTURECUBE_FACE { + D3D10_TEXTURECUBE_FACE_POSITIVE_X, + D3D10_TEXTURECUBE_FACE_NEGATIVE_X, + D3D10_TEXTURECUBE_FACE_POSITIVE_Y, + D3D10_TEXTURECUBE_FACE_NEGATIVE_Y, + D3D10_TEXTURECUBE_FACE_POSITIVE_Z, + D3D10_TEXTURECUBE_FACE_NEGATIVE_Z, +} D3D10_TEXTURECUBE_FACE; + +typedef enum D3D10_ASYNC_GETDATA_FLAG { + D3D10_ASYNC_GETDATA_DONOTFLUSH = 0x1, +} D3D10_ASYNC_GETDATA_FLAG; + +typedef enum D3D10_FILTER_TYPE { + D3D10_FILTER_TYPE_POINT, + D3D10_FILTER_TYPE_LINEAR +} D3D10_FILTER_TYPE; + +typedef enum D3D10_QUERY_MISC_FLAG { + D3D10_QUERY_MISC_PREDICATEHINT = 0x1 +} D3D10_QUERY_MISC_FLAG; + +typedef struct D3D10_QUERY_DATA_TIMESTAMP_DISJOINT { + UINT64 Frequency; + BOOL Disjoint; +} D3D10_QUERY_DATA_TIMESTAMP_DISJOINT; + +typedef struct D3D10_QUERY_DATA_PIPELINE_STATISTICS { + UINT64 IAVertices; + UINT64 IAPrimitives; + UINT64 VSInvocations; + UINT64 GSInvocations; + UINT64 GSPrimitives; + UINT64 CInvocations; + UINT64 CPrimitives; + UINT64 PSInvocations; +} D3D10_QUERY_DATA_PIPELINE_STATISTICS; + +typedef struct D3D10_QUERY_DATA_SO_STATISTICS { + UINT64 NumPrimitivesWritten; + UINT64 PrimitivesStorageNeeded; +} D3D10_QUERY_DATA_SO_STATISTICS; + +typedef enum D3D10_CREATE_DEVICE_FLAG { + D3D10_CREATE_DEVICE_SINGLETHREADED = 0x1, + D3D10_CREATE_DEVICE_DEBUG = 0x2, + D3D10_CREATE_DEVICE_SWITCH_TO_REF = 0x4, + D3D10_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS = 0x8 +} D3D10_CREATE_DEVICE_FLAG; + +/* Core */ + +interface ID3D10Device; + +[ + object, + local, + uuid(9b7e4c00-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10DeviceChild : IUnknown +{ + void GetDevice( + [out] ID3D10Device **a); + HRESULT GetPrivateData( + [in] REFGUID a, + [in, out] UINT *b, + [out] void *c); + HRESULT SetPrivateData( + [in] REFGUID a, + [in] UINT b, + [in] const void *c); + HRESULT SetPrivateDataInterface( + [in] REFGUID a, + [in] const IUnknown *b); +} + +/* Resource */ + +[ + object, + local, + uuid(9b7e4c01-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10Resource : ID3D10DeviceChild +{ + void GetType( + [out] D3D10_RESOURCE_DIMENSION *a); + void SetEvictionPriority( + [in] UINT a); + UINT GetEvictionPriority(); +} + +[ + object, + local, + uuid(9b7e4c02-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10Buffer : ID3D10Resource +{ + HRESULT Map( + [in] D3D10_MAP a, + [in] UINT b, + [out] void **c); + void Unmap(); + void GetDesc( + [out] D3D10_BUFFER_DESC *a); +} + +[ + object, + local, + uuid(9b7e4c03-342c-4106-a19f-4f2704f689F0) +] +interface ID3D10Texture1D : ID3D10Resource +{ + HRESULT Map( + [in] UINT a, + [in] D3D10_MAP b, + [in] UINT c, + [out] void **d); + void Unmap( + [in] UINT a); + void GetDesc( + [out] D3D10_TEXTURE1D_DESC *a); +} + +[ + object, + local, + uuid(9b7e4c04-342c-4106-a19f-4f2704f689F0) +] +interface ID3D10Texture2D : ID3D10Resource +{ + HRESULT Map( + [in] UINT a, + [in] D3D10_MAP b, + [in] UINT c, + [out] D3D10_MAPPED_TEXTURE2D *d); + void Unmap( + [in] UINT a); + void GetDesc( + [out] D3D10_TEXTURE2D_DESC *a); +} + +[ + object, + local, + uuid(9b7e4c05-342c-4106-a19f-4f2704f689F0) +] +interface ID3D10Texture3D : ID3D10Resource +{ + HRESULT Map( + [in] UINT a, + [in] D3D10_MAP b, + [in] UINT c, + [out] D3D10_MAPPED_TEXTURE3D *d); + void Unmap( + [in] UINT a); + void GetDesc( + [out] D3D10_TEXTURE3D_DESC *a); +} + +[ + object, + local, + uuid(c902b03f-60a7-49ba-9936-2a3ab37a7e33) +] +interface ID3D10View : ID3D10DeviceChild +{ + void GetResource( + [out] ID3D10Resource **a); +} + +[ + object, + local, + uuid(9b7e4c09-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10DepthStencilView : ID3D10View +{ + void GetDesc( + [out] D3D10_DEPTH_STENCIL_VIEW_DESC *a); +} + + +[ + object, + local, + uuid(9b7e4c08-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10RenderTargetView : ID3D10View +{ + void GetDesc( + [out] D3D10_RENDER_TARGET_VIEW_DESC *a); +} + +[ + object, + local, + uuid(9b7e4c07-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10ShaderResourceView : ID3D10View +{ + void GetDesc( + [out] D3D10_SHADER_RESOURCE_VIEW_DESC *a); +} + +/* Resource End */ + +[ + object, + local, + uuid(edad8d19-8a35-4d6d-8566-2ea276cde161) +] +interface ID3D10BlendState : ID3D10DeviceChild +{ + void GetDesc( + [out] D3D10_BLEND_DESC *a); +} + +[ + object, + local, + uuid(2b4b1cc8-a4ad-41f8-8322-ca86fc3ec675) +] +interface ID3D10DepthStencilState : ID3D10DeviceChild +{ + void GetDesc( + [out] D3D10_DEPTH_STENCIL_DESC *a); +} + +[ + object, + local, + uuid(6316be88-54cd-4040-ab44-20461bc81f68) +] +interface ID3D10GeometryShader : ID3D10DeviceChild +{ +} + +[ + object, + local, + uuid(9b7e4c0b-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10InputLayout : ID3D10DeviceChild +{ +} + +[ + object, + local, + uuid(4968b601-9d00-4cde-8346-8e7f675819b6) +] +interface ID3D10PixelShader : ID3D10DeviceChild +{ +} + +[ + object, + local, + uuid(a2a07292-89af-4345-be2e-c53d9fbb6e9f) +] +interface ID3D10RasterizerState : ID3D10DeviceChild +{ + void GetDesc( + [out] D3D10_RASTERIZER_DESC *a); +} + +[ + object, + local, + uuid(9b7e4c0c-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10SamplerState : ID3D10DeviceChild +{ + void GetDesc( + [out] D3D10_SAMPLER_DESC *a); +} + +[ + object, + local, + uuid(9b7e4c0a-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10VertexShader : ID3D10DeviceChild +{ +} + +[ + object, + local, + uuid(9b7e4c0d-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10Asynchronous : ID3D10DeviceChild +{ + void Begin(); + void End(); + HRESULT GetData( + [out] void *a, + [in] UINT b, + [in] UINT c); + UINT GetDataSize(); +} + +[ + object, + local, + uuid(9b7e4c11-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10Counter : ID3D10Asynchronous +{ + void GetDesc( + [out] D3D10_COUNTER_DESC *a); +} + +[ + object, + local, + uuid(9b7e4C0e-342C-4106-a19f-4f2704f689f0) +] +interface ID3D10Query : ID3D10Asynchronous +{ + void GetDesc( + [out] D3D10_QUERY_DESC *a); +} + +[ + object, + local, + uuid(9b7e4c10-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10Predicate : ID3D10Query +{ +} + +[ + object, + local, + uuid(9b7e4c0f-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10Device : IUnknown +{ + void VSSetConstantBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D10Buffer *const *c); + void PSSetShaderResources( + [in] UINT a, + [in] UINT b, + [in] ID3D10ShaderResourceView *const *c); + void PSSetShader( + [in] ID3D10PixelShader *a); + void PSSetSamplers( + [in] UINT a, + [in] UINT b, + [in]ID3D10SamplerState *const *c); + void VSSetShader( + [in] ID3D10VertexShader *a); + void DrawIndexed( + [in] UINT a, + [in] UINT b, + [in] INT c); + void Draw( + [in] UINT a, + [in] UINT b); + void PSSetConstantBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D10Buffer *const *c); + void IASetInputLayout( + [in] ID3D10InputLayout *a); + void IASetVertexBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D10Buffer *const *c, + [in] const UINT *d, + [in] const UINT *e); + void IASetIndexBuffer( + [in] ID3D10Buffer *a, + [in] DXGI_FORMAT b, + [in] UINT c); + void DrawIndexedInstanced( + [in] UINT a, + [in] UINT b, + [in] UINT c, + [in] INT d, + [in] UINT e); + void DrawInstanced( + [in] UINT a, + [in] UINT b, + [in] UINT c, + [in] UINT d); + void GSSetConstantBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D10Buffer *const *c); + void GSSetShader( + [in] ID3D10GeometryShader *a); + void IASetPrimitiveTopology( + [in] D3D10_PRIMITIVE_TOPOLOGY a); + void VSSetShaderResources( + [in] UINT a, + [in] UINT b, + [in] ID3D10ShaderResourceView *const *c); + void VSSetSamplers( + [in] UINT a, + [in] UINT b, + [in] ID3D10SamplerState *const *c); + void SetPredication( + [in] ID3D10Predicate *a, + [in] BOOL b); + void GSSetShaderResources( + [in] UINT a, + [in] UINT b, + [in] ID3D10ShaderResourceView * const *c); + void GSSetSamplers( + [in] UINT a, + [in] UINT b, + [in] ID3D10SamplerState *const *c); + void OMSetRenderTargets( + [in] UINT a, + [in] ID3D10RenderTargetView *const *b, + [in] ID3D10DepthStencilView *c); + void OMSetBlendState( + [in] ID3D10BlendState *a, + [in] const FLOAT b[4], + [in] UINT c); + void OMSetDepthStencilState( + [in] ID3D10DepthStencilState *a, + [in] UINT b); + void SOSetTargets( + [in] UINT a, + [in] ID3D10Buffer *const *b, + [in] const UINT *c); + void DrawAuto(); + void RSSetState( + [in] ID3D10RasterizerState *a); + void RSSetViewports( + [in] UINT a, + [in] const D3D10_VIEWPORT *b); + void RSSetScissorRects( + [in] UINT a, + [in] const D3D10_RECT *b); + void CopySubresourceRegion( + [in] ID3D10Resource *a, + [in] UINT b, + [in] UINT c, + [in] UINT d, + [in] UINT e, + [in] ID3D10Resource *f, + [in] UINT g, + [in] const D3D10_BOX *h); + void CopyResource( + [in] ID3D10Resource *a, + [in] ID3D10Resource *b); + void UpdateSubresource( + [in] ID3D10Resource *a, + [in] UINT b, + [in] const D3D10_BOX *c, + [in] const void *d, + [in] UINT e, + [in] UINT f); + void ClearRenderTargetView( + [in] ID3D10RenderTargetView *a, + [in] const FLOAT b[4]); + void ClearDepthStencilView( + [in] ID3D10DepthStencilView *a, + [in] UINT b, + [in] FLOAT c, + [in] UINT8 d); + void GenerateMips( + [in] ID3D10ShaderResourceView *a); + void ResolveSubresource( + [in] ID3D10Resource *a, + [in] UINT b, + [in] ID3D10Resource *c, + [in] UINT d, + [in] DXGI_FORMAT e); + void VSGetConstantBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D10Buffer **c); + void PSGetShaderResources( + [in] UINT a, + [in] UINT b, + [out] ID3D10ShaderResourceView **c); + void PSGetShader( + [out] ID3D10PixelShader **a); + void PSGetSamplers( + [in] UINT a, + [in] UINT b, + [out] ID3D10SamplerState **c); + void VSGetShader( + [out] ID3D10VertexShader **a); + void PSGetConstantBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D10Buffer **c); + void IAGetInputLayout( + [out] ID3D10InputLayout **a); + void IAGetVertexBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D10Buffer **c, + [out] UINT *d, + [out] UINT *e); + void IAGetIndexBuffer( + [out] ID3D10Buffer **a, + [out] DXGI_FORMAT *b, + [out] UINT *c); + void GSGetConstantBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D10Buffer **c); + void GSGetShader( + [out] ID3D10GeometryShader **a); + void IAGetPrimitiveTopology( + [out] D3D10_PRIMITIVE_TOPOLOGY *a); + void VSGetShaderResources( + [in] UINT a, + [in] UINT b, + [out] ID3D10ShaderResourceView **c); + void VSGetSamplers( + [in] UINT a, + [in] UINT b, + [out] ID3D10SamplerState **c); + void GetPredication( + [out] ID3D10Predicate **a, + [out] BOOL *b); + void GSGetShaderResources( + [in] UINT a, + [in] UINT b, + [out] ID3D10ShaderResourceView **c); + void GSGetSamplers( + [in] UINT a, + [in] UINT b, + [out] ID3D10SamplerState **c); + void OMGetRenderTargets( + [in] UINT a, + [out] ID3D10RenderTargetView **b, + [out] ID3D10DepthStencilView **c); + void OMGetBlendState( + [out] ID3D10BlendState **a, + [out] FLOAT b[4], + [out] UINT *c); + void OMGetDepthStencilState( + [out] ID3D10DepthStencilState **a, + [out] UINT *b); + void SOGetTargets( + [in] UINT a, + [out] ID3D10Buffer **b, + [out] UINT *c); + void RSGetState( + [out] ID3D10RasterizerState **a); + void RSGetViewports( + [in, out] UINT *a, + [out] D3D10_VIEWPORT *b); + void RSGetScissorRects( + [in, out] UINT *a, + [out] D3D10_RECT *b); + HRESULT GetDeviceRemovedReason(); + HRESULT SetExceptionMode( + [in] UINT a); + UINT GetExceptionMode(); + HRESULT GetPrivateData( + [in] REFGUID a, + [in, out] UINT *b, + [out] void *c); + HRESULT SetPrivateData( + [in] REFGUID a, + [in] UINT b, + [in] const void *c); + HRESULT SetPrivateDataInterface( + [in] REFGUID a, + [in] const IUnknown *b); + void ClearState(); + void Flush(); + HRESULT CreateBuffer( + [in] const D3D10_BUFFER_DESC *a, + [in] const D3D10_SUBRESOURCE_DATA *b, + [out] ID3D10Buffer **c); + HRESULT CreateTexture1D( + [in] const D3D10_TEXTURE1D_DESC *a, + [in] const D3D10_SUBRESOURCE_DATA *b, + [out] ID3D10Texture1D **c); + HRESULT CreateTexture2D( + [in] const D3D10_TEXTURE2D_DESC *a, + [in] const D3D10_SUBRESOURCE_DATA *b, + [out] ID3D10Texture2D **c); + HRESULT CreateTexture3D( + [in] const D3D10_TEXTURE3D_DESC *a, + [in] const D3D10_SUBRESOURCE_DATA *b, + [out] ID3D10Texture3D **c); + HRESULT CreateShaderResourceView( + [in] ID3D10Resource *a, + [in] const D3D10_SHADER_RESOURCE_VIEW_DESC *b, + [out] ID3D10ShaderResourceView **c); + HRESULT CreateRenderTargetView( + [in] ID3D10Resource *a, + [in] const D3D10_RENDER_TARGET_VIEW_DESC *b, + [out] ID3D10RenderTargetView **c); + HRESULT CreateDepthStencilView( + [in] ID3D10Resource *a, + [in] const D3D10_DEPTH_STENCIL_VIEW_DESC *b, + [out] ID3D10DepthStencilView **c); + HRESULT CreateInputLayout( + [in] const D3D10_INPUT_ELEMENT_DESC *a, + [in] UINT b, + [in] const void *c, + [in] SIZE_T d, + [out] ID3D10InputLayout **e); + HRESULT CreateVertexShader( + [in] const void *a, + [in] SIZE_T b, + [out] ID3D10VertexShader **c); + HRESULT CreateGeometryShader( + [in] const void *a, + [in] SIZE_T b, + [out] ID3D10GeometryShader **c); + HRESULT CreateGeometryShaderWithStreamOutput( + [in] const void *a, + [in] SIZE_T b, + [in] const D3D10_SO_DECLARATION_ENTRY *c, + [in] UINT d, + [in] UINT e, + [out] ID3D10GeometryShader **f); + HRESULT CreatePixelShader( + [in] const void *a, + [in] SIZE_T b, + [out] ID3D10PixelShader **c); + HRESULT CreateBlendState( + [in] const D3D10_BLEND_DESC *a, + [out] ID3D10BlendState **b); + HRESULT CreateDepthStencilState( + [in] const D3D10_DEPTH_STENCIL_DESC *a, + [out] ID3D10DepthStencilState **b); + HRESULT CreateRasterizerState( + [in] const D3D10_RASTERIZER_DESC *a, + [out] ID3D10RasterizerState **b); + HRESULT CreateSamplerState( + [in] const D3D10_SAMPLER_DESC *a, + [out] ID3D10SamplerState **b); + HRESULT CreateQuery( + [in] const D3D10_QUERY_DESC *a, + [out] ID3D10Query **b); + HRESULT CreatePredicate( + [in] const D3D10_QUERY_DESC *a, + [out] ID3D10Predicate **b); + HRESULT CreateCounter( + [in] const D3D10_COUNTER_DESC *a, + [out] ID3D10Counter **b); + HRESULT CheckFormatSupport( + [in] DXGI_FORMAT a, + [out] UINT *b); + HRESULT CheckMultisampleQualityLevels( + [in] DXGI_FORMAT a, + [in] UINT b, + [out] UINT *c); + void CheckCounterInfo( + [out] D3D10_COUNTER_INFO *a); + HRESULT CheckCounter( + [in] const D3D10_COUNTER_DESC *a, + [out] D3D10_COUNTER_TYPE *b, + [out] UINT *c, + [out] LPSTR d, + [in, out] UINT *e, + [out] LPSTR f, + [in, out] UINT *g, + [out] LPSTR h, + [in, out] UINT *i); + UINT GetCreationFlags(); + HRESULT OpenSharedResource( + [in] HANDLE a, + [in] REFIID b, + [out] void **c); + void SetTextFilterSize( + [in] UINT a, + [in] UINT b); + void GetTextFilterSize( + [out] UINT *a, + [out] UINT *b); +} + +[ + object, + local, + uuid(9b7e4e00-342c-4106-a19f-4f2704f689f0) +] +interface ID3D10Multithread : IUnknown +{ + void Enter(); + void Leave(); + BOOL SetMultithreadProtected( + [in] BOOL a); + BOOL GetMultithreadProtected(); +} + +cpp_quote("#include \"d3d10misc.h\"") +cpp_quote("#include \"d3d10shader.h\"") +cpp_quote("#include \"d3d10effect.h\"") +/* TODO: Include "d310sdklayers.h" as soon as it exists */ diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/d3d10_1.idl b/src/gallium/state_trackers/d3d1x/d3dapi/d3d10_1.idl new file mode 100644 index 0000000000..7edeff94f7 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/d3d10_1.idl @@ -0,0 +1,191 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +import "oaidl.idl"; +import "ocidl.idl"; +import "d3d10.idl"; +import "d3dcommon.idl"; + +const unsigned int D3D10_1_SDK_VERSION = 0x20; + +cpp_quote("#ifndef _D3D10_1_CONSTANTS") +cpp_quote("#define _D3D10_1_CONSTANTS") +const unsigned int D3D10_1_DEFAULT_SAMPLE_MASK = 0xffffffff; +const float D3D10_1_FLOAT16_FUSED_TOLERANCE_IN_ULP = 0.6; +const float D3D10_1_FLOAT32_TO_INTEGER_TOLERANCE_IN_ULP = 0.6; +const unsigned int D3D10_1_GS_INPUT_REGISTER_COUNT = 32; +const unsigned int D3D10_1_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT = 32; +const unsigned int D3D10_1_IA_VERTEX_INPUT_STRUCTURE_ELEMENTS_COMPONENTS = 128; +const unsigned int D3D10_1_IA_VERTEX_INPUT_STRUCTURE_ELEMENT_COUNT = 32; +const unsigned int D3D10_1_PS_OUTPUT_MASK_REGISTER_COMPONENTS = 1; +const unsigned int D3D10_1_PS_OUTPUT_MASK_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D10_1_PS_OUTPUT_MASK_REGISTER_COUNT = 1; +const unsigned int D3D10_1_SHADER_MAJOR_VERSION = 4; +const unsigned int D3D10_1_SHADER_MINOR_VERSION = 1; +const unsigned int D3D10_1_SO_BUFFER_MAX_STRIDE_IN_BYTES = 2048; +const unsigned int D3D10_1_SO_BUFFER_MAX_WRITE_WINDOW_IN_BYTES = 256; +const unsigned int D3D10_1_SO_BUFFER_SLOT_COUNT = 4; +const unsigned int D3D10_1_SO_MULTIPLE_BUFFER_ELEMENTS_PER_BUFFER = 1; +const unsigned int D3D10_1_SO_SINGLE_BUFFER_COMPONENT_LIMIT = 64; +const unsigned int D3D10_1_STANDARD_VERTEX_ELEMENT_COUNT = 32; +const unsigned int D3D10_1_SUBPIXEL_FRACTIONAL_BIT_COUNT = 8; +const unsigned int D3D10_1_VS_INPUT_REGISTER_COUNT = 32; +const unsigned int D3D10_1_VS_OUTPUT_REGISTER_COUNT = 32; +cpp_quote("#endif") + +typedef enum D3D10_FEATURE_LEVEL1 +{ + D3D10_FEATURE_LEVEL_10_0 = 0xa000, + D3D10_FEATURE_LEVEL_10_1 = 0xa100, + D3D10_FEATURE_LEVEL_9_1 = 0x9100, + D3D10_FEATURE_LEVEL_9_2 = 0x9200, + D3D10_FEATURE_LEVEL_9_3 = 0x9300 +} D3D10_FEATURE_LEVEL1; + +typedef struct D3D10_RENDER_TARGET_BLEND_DESC1 +{ + BOOL BlendEnable; + D3D10_BLEND SrcBlend; + D3D10_BLEND DestBlend; + D3D10_BLEND_OP BlendOp; + D3D10_BLEND SrcBlendAlpha; + D3D10_BLEND DestBlendAlpha; + D3D10_BLEND_OP BlendOpAlpha; + UINT8 RenderTargetWriteMask; +} D3D10_RENDER_TARGET_BLEND_DESC1; + +typedef struct D3D10_BLEND_DESC1 +{ + BOOL AlphaToCoverageEnable; + BOOL IndependentBlendEnable; + D3D10_RENDER_TARGET_BLEND_DESC1 RenderTarget[8]; +} D3D10_BLEND_DESC1; + +typedef struct D3D10_TEXCUBE_ARRAY_SRV1 +{ + UINT MostDetailedMip; + UINT MipLevels; + UINT First2DArrayFace; + UINT NumCubes; +} D3D10_TEXCUBE_ARRAY_SRV1; + +typedef D3D_SRV_DIMENSION D3D10_SRV_DIMENSION1; + +typedef struct D3D10_SHADER_RESOURCE_VIEW_DESC1 +{ + DXGI_FORMAT Format; + D3D10_SRV_DIMENSION1 ViewDimension; + union + { + D3D10_BUFFER_SRV Buffer; + D3D10_TEX1D_SRV Texture1D; + D3D10_TEX1D_ARRAY_SRV Texture1DArray; + D3D10_TEX2D_SRV Texture2D; + D3D10_TEX2D_ARRAY_SRV Texture2DArray; + D3D10_TEX2DMS_SRV Texture2DMS; + D3D10_TEX2DMS_ARRAY_SRV Texture2DMSArray; + D3D10_TEX3D_SRV Texture3D; + D3D10_TEXCUBE_SRV TextureCube; + D3D10_TEXCUBE_ARRAY_SRV1 TextureCubeArray; + } ; +} D3D10_SHADER_RESOURCE_VIEW_DESC1; + +typedef enum D3D10_STANDARD_MULTISAMPLE_QUALITY_LEVELS +{ + D3D10_STANDARD_MULTISAMPLE_PATTERN = 0xffffffff, + D3D10_CENTER_MULTISAMPLE_PATTERN = 0xfffffffe +} D3D10_STANDARD_MULTISAMPLE_QUALITY_LEVELS; + +[object, local, uuid("EDAD8D99-8A35-4d6d-8566-2EA276CDE161")] +interface ID3D10BlendState1 : ID3D10BlendState +{ + void GetDesc1( + [out] D3D10_BLEND_DESC1 *a + ); +}; + +[object, local, uuid("9B7E4C87-342C-4106-A19F-4F2704F689F0")] +interface ID3D10ShaderResourceView1 : ID3D10ShaderResourceView +{ + void GetDesc1( + [out] D3D10_SHADER_RESOURCE_VIEW_DESC1 *a + ); +}; + +[object, local, uuid("9B7E4C8F-342C-4106-A19F-4F2704F689F0")] +interface ID3D10Device1 : ID3D10Device +{ + HRESULT CreateShaderResourceView1( + [in] ID3D10Resource *a, + [in] const D3D10_SHADER_RESOURCE_VIEW_DESC1 *b, + [out,optional] ID3D10ShaderResourceView1 **c + ); + + HRESULT CreateBlendState1( + [in] const D3D10_BLEND_DESC1 *a, + [out, optional] ID3D10BlendState1 **b + ); + + D3D10_FEATURE_LEVEL1 GetFeatureLevel(); +}; + +//cpp_quote("#include \"d3d10_1shader.h\"") + +typedef enum D3D10_DRIVER_TYPE D3D10_DRIVER_TYPE; + +HRESULT D3D10CreateDevice1( + [in,optional] IDXGIAdapter* a, + [in] D3D10_DRIVER_TYPE b, + [in] HMODULE c, + [in] UINT d, + [in] D3D10_FEATURE_LEVEL1 e, + [in] UINT f, + [out,optional] ID3D10Device1** g +); + +typedef HRESULT (* PFN_D3D10_CREATE_DEVICE_AND_SWAP_CHAIN1)( + [in,optional] IDXGIAdapter*, + D3D10_DRIVER_TYPE, + HMODULE, + UINT, + D3D10_FEATURE_LEVEL1, + UINT, + [in, optional] DXGI_SWAP_CHAIN_DESC*, + [out,optional] IDXGISwapChain**, + [out,optional] ID3D10Device1** +); + +HRESULT D3D10CreateDeviceAndSwapChain1( + [in,optional] IDXGIAdapter* a, + [in] D3D10_DRIVER_TYPE b, + [in] HMODULE c, + [in] UINT d, + [in] D3D10_FEATURE_LEVEL1 e, + [in] UINT f, + [in,optional] DXGI_SWAP_CHAIN_DESC* g, + [out,optional] IDXGISwapChain** h, + [out,optional] ID3D10Device1** i +); diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/d3d10misc.h b/src/gallium/state_trackers/d3d1x/d3dapi/d3d10misc.h new file mode 100644 index 0000000000..4f866ff415 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/d3d10misc.h @@ -0,0 +1,47 @@ +/* + * Copyright 2008 Henri Verbeet for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __D3D10MISC_H__ +#define __D3D10MISC_H__ + +#include "d3d10.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum D3D10_DRIVER_TYPE { + D3D10_DRIVER_TYPE_HARDWARE = 0, + D3D10_DRIVER_TYPE_REFERENCE = 1, + D3D10_DRIVER_TYPE_NULL = 2, + D3D10_DRIVER_TYPE_SOFTWARE = 3, + D3D10_DRIVER_TYPE_WARP = 5, // added by Luca Barbieri in Sep 2010 +} D3D10_DRIVER_TYPE; + +HRESULT WINAPI D3D10CreateDevice(IDXGIAdapter *adapter, D3D10_DRIVER_TYPE driver_type, + HMODULE swrast, UINT flags, UINT sdk_version, ID3D10Device **device); + +HRESULT WINAPI D3D10CreateDeviceAndSwapChain(IDXGIAdapter *adapter, D3D10_DRIVER_TYPE driver_type, + HMODULE swrast, UINT flags, UINT sdk_version, DXGI_SWAP_CHAIN_DESC *swapchain_desc, + IDXGISwapChain **swapchain, ID3D10Device **device); + +#ifdef __cplusplus +} +#endif + +#endif /* __D3D10MISC_H__ */ diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/d3d10shader.idl b/src/gallium/state_trackers/d3d1x/d3dapi/d3d10shader.idl new file mode 100644 index 0000000000..6088a8894a --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/d3d10shader.idl @@ -0,0 +1,269 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +import "d3d10.idl"; + +cpp_quote("#define D3D10_TX_VERSION(a, b) (('T' << 24) | ('X' << 16) | ((a) << 8) | (b)))") + +const unsigned int D3D10_SHADER_DEBUG = (1 << 0); +const unsigned int D3D10_SHADER_SKIP_VALIDATION = (1 << 1); +const unsigned int D3D10_SHADER_SKIP_OPTIMIZATION = (1 << 2); +const unsigned int D3D10_SHADER_PACK_MATRIX_ROW_MAJOR = (1 << 3); +const unsigned int D3D10_SHADER_PACK_MATRIX_COLUMN_MAJOR = (1 << 4); +const unsigned int D3D10_SHADER_PARTIAL_PRECISION = (1 << 5); +const unsigned int D3D10_SHADER_FORCE_VS_SOFTWARE_NO_OPT = (1 << 6); +const unsigned int D3D10_SHADER_FORCE_PS_SOFTWARE_NO_OPT = (1 << 7); +const unsigned int D3D10_SHADER_NO_PRESHADER = (1 << 8); +const unsigned int D3D10_SHADER_AVOID_FLOW_CONTROL = (1 << 9); +const unsigned int D3D10_SHADER_PREFER_FLOW_CONTROL = (1 << 10); +const unsigned int D3D10_SHADER_ENABLE_STRICTNESS = (1 << 11); +const unsigned int D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY = (1 << 12); +const unsigned int D3D10_SHADER_IEEE_STRICTNESS = (1 << 13); +const unsigned int D3D10_SHADER_WARNINGS_ARE_ERRORS = (1 << 18); + + +const unsigned int D3D10_SHADER_OPTIMIZATION_LEVEL0 = (1 << 14); +const unsigned int D3D10_SHADER_OPTIMIZATION_LEVEL1 = 0; +const unsigned int D3D10_SHADER_OPTIMIZATION_LEVEL2 = ((1 << 14) | (1 << 15)); +const unsigned int D3D10_SHADER_OPTIMIZATION_LEVEL3 = (1 << 15); + +typedef D3D_SHADER_MACRO D3D10_SHADER_MACRO; +typedef D3D10_SHADER_MACRO* LPD3D10_SHADER_MACRO; + + +typedef D3D_SHADER_VARIABLE_CLASS D3D10_SHADER_VARIABLE_CLASS; +typedef D3D10_SHADER_VARIABLE_CLASS* LPD3D10_SHADER_VARIABLE_CLASS; +typedef D3D_SHADER_VARIABLE_FLAGS D3D10_SHADER_VARIABLE_FLAGS; +typedef D3D10_SHADER_VARIABLE_FLAGS* LPD3D10_SHADER_VARIABLE_FLAGS; +typedef D3D_SHADER_VARIABLE_TYPE D3D10_SHADER_VARIABLE_TYPE; +typedef D3D10_SHADER_VARIABLE_TYPE* LPD3D10_SHADER_VARIABLE_TYPE; +typedef D3D_SHADER_INPUT_FLAGS D3D10_SHADER_INPUT_FLAGS; +typedef D3D10_SHADER_INPUT_FLAGS* LPD3D10_SHADER_INPUT_FLAGS; +typedef D3D_SHADER_INPUT_TYPE D3D10_SHADER_INPUT_TYPE; +typedef D3D10_SHADER_INPUT_TYPE* LPD3D10_SHADER_INPUT_TYPE; +typedef D3D_SHADER_CBUFFER_FLAGS D3D10_SHADER_CBUFFER_FLAGS; +typedef D3D10_SHADER_CBUFFER_FLAGS* LPD3D10_SHADER_CBUFFER_FLAGS; +typedef D3D_CBUFFER_TYPE D3D10_CBUFFER_TYPE; +typedef D3D10_CBUFFER_TYPE* LPD3D10_CBUFFER_TYPE; +typedef D3D_NAME D3D10_NAME; +typedef D3D_RESOURCE_RETURN_TYPE D3D10_RESOURCE_RETURN_TYPE; +typedef D3D_REGISTER_COMPONENT_TYPE D3D10_REGISTER_COMPONENT_TYPE; +typedef D3D_INCLUDE_TYPE D3D10_INCLUDE_TYPE; +typedef ID3DInclude* LPD3D10INCLUDE; + +cpp_quote("#define D3D10_SHVER_GET_TYPE(v) (((v) >> 16) & 0xffff)") +cpp_quote("#define D3D10_SHVER_GET_MAJOR(v) (((v) >> 4) & 0xf)") +cpp_quote("#define D3D10_SHVER_GET_MINOR(v) (((v) >> 0) & 0xf)") + +typedef struct _D3D10_SIGNATURE_PARAMETER_DESC +{ + LPCSTR SemanticName; + UINT SemanticIndex; + UINT Register; + D3D_NAME SystemValueType; + D3D_REGISTER_COMPONENT_TYPE ComponentType; + BYTE Mask; + BYTE ReadWriteMask; +} D3D10_SIGNATURE_PARAMETER_DESC; + +typedef struct _D3D10_SHADER_BUFFER_DESC +{ + LPCSTR Name; + D3D_CBUFFER_TYPE Type; + UINT Variables; + UINT Size; + UINT uFlags; +} D3D10_SHADER_BUFFER_DESC; + +typedef struct _D3D10_SHADER_VARIABLE_DESC +{ + LPCSTR Name; + UINT StartOffset; + UINT Size; + UINT uFlags; + LPVOID DefaultValue; + UINT StartTexture; + UINT TextureSize; + UINT StartSampler; + UINT SamplerSize; +} D3D10_SHADER_VARIABLE_DESC; + +typedef struct _D3D10_SHADER_TYPE_DESC +{ + D3D_SHADER_VARIABLE_CLASS Class; + D3D_SHADER_VARIABLE_TYPE Type; + UINT Rows; + UINT Columns; + UINT Elements; + UINT Members; + UINT Offset; + LPCSTR Name; +} D3D10_SHADER_TYPE_DESC; + +typedef D3D_TESSELLATOR_DOMAIN D3D10_TESSELLATOR_DOMAIN; +typedef D3D_TESSELLATOR_PARTITIONING D3D10_TESSELLATOR_PARTITIONING; +typedef D3D_TESSELLATOR_OUTPUT_PRIMITIVE D3D10_TESSELLATOR_OUTPUT_PRIMITIVE; + +typedef struct _D3D10_SHADER_DESC +{ + UINT Version; + LPCSTR Creator; + UINT Flags; + + UINT ConstantBuffers; + UINT BoundResources; + UINT InputParameters; + UINT OutputParameters; + + UINT InstructionCount; + UINT TempRegisterCount; + UINT TempArrayCount; + UINT DefCount; + UINT DclCount; + UINT TextureNormalInstructions; + UINT TextureLoadInstructions; + UINT TextureCompInstructions; + UINT TextureBiasInstructions; + UINT TextureGradientInstructions; + UINT FloatInstructionCount; + UINT IntInstructionCount; + UINT UintInstructionCount; + UINT StaticFlowControlCount; + UINT DynamicFlowControlCount; + UINT MacroInstructionCount; + UINT ArrayInstructionCount; + UINT CutInstructionCount; + UINT EmitInstructionCount; + D3D_PRIMITIVE_TOPOLOGY GSOutputTopology; + UINT GSMaxOutputVertexCount; +} D3D10_SHADER_DESC; + +typedef struct _D3D10_SHADER_INPUT_BIND_DESC +{ + LPCSTR Name; + D3D_SHADER_INPUT_TYPE Type; + UINT BindPoint; + UINT BindCount; + + UINT uFlags; + D3D_RESOURCE_RETURN_TYPE ReturnType; + D3D_SRV_DIMENSION Dimension; + UINT NumSamples; +} D3D10_SHADER_INPUT_BIND_DESC; + +[local, object, uuid("C530AD7D-9B16-4395-A979-BA2ECFF83ADD")] +interface ID3D10ShaderReflectionType +{ + HRESULT GetDesc( + [out] D3D10_SHADER_TYPE_DESC *a + ); + + ID3D10ShaderReflectionType* GetMemberTypeByIndex( + [in] UINT a + ); + + ID3D10ShaderReflectionType* GetMemberTypeByName( + [in] LPCSTR a + ); + + LPCSTR GetMemberTypeName( + [in] UINT a + ); +}; + +interface ID3D10ShaderReflectionConstantBuffer; + +[object, local, uuid("1BF63C95-2650-405d-99C1-3636BD1DA0A1")] +interface ID3D10ShaderReflectionVariable +{ + HRESULT GetDesc( + [out] D3D10_SHADER_VARIABLE_DESC *a + ); + + ID3D10ShaderReflectionType* GetType(); +}; + +[object, local, uuid("66C66A94-DDDD-4b62-A66A-F0DA33C2B4D0")] +interface ID3D10ShaderReflectionConstantBuffer +{ + HRESULT GetDesc( + [out] D3D10_SHADER_BUFFER_DESC *a + ); + + ID3D10ShaderReflectionVariable* GetVariableByIndex( + [in] UINT a + ); + + ID3D10ShaderReflectionVariable* GetVariableByName( + [in] LPCSTR a + ); +}; + +[object,local,uuid("D40E20B6-F8F7-42ad-AB20-4BAF8F15DFAA")] +interface ID3D10ShaderReflection : IUnknown +{ + HRESULT GetDesc( + [out] D3D10_SHADER_DESC *a + ); + + ID3D10ShaderReflectionConstantBuffer* GetConstantBufferByIndex( + [in] UINT a + ); + + ID3D10ShaderReflectionConstantBuffer* GetConstantBufferByName( + [in] LPCSTR a + ); + + HRESULT GetResourceBindingDesc( + [in] UINT a, + [out] D3D10_SHADER_INPUT_BIND_DESC *b + ); + + HRESULT GetInputParameterDesc( + [in] UINT a, + [out] D3D10_SIGNATURE_PARAMETER_DESC *b + ); + + HRESULT GetOutputParameterDesc + ( + [in] UINT a, + [out] D3D10_SIGNATURE_PARAMETER_DESC *b + ); +}; + +HRESULT D3D10CompileShader(LPCSTR pSrcData, SIZE_T SrcDataLen, LPCSTR pFileName, const D3D10_SHADER_MACRO* pDefines, LPD3D10INCLUDE pInclude, + LPCSTR pFunctionName, LPCSTR pProfile, UINT Flags, ID3D10Blob** ppShader, ID3D10Blob** ppErrorMsgs); +HRESULT D3D10DisassembleShader(const void *pShader, SIZE_T BytecodeLength, BOOL EnableColorCode, LPCSTR pComments, ID3D10Blob** ppDisassembly); +LPCSTR D3D10GetPixelShaderProfile(ID3D10Device *pDevice); +LPCSTR D3D10GetVertexShaderProfile(ID3D10Device *pDevice); +LPCSTR D3D10GetGeometryShaderProfile(ID3D10Device *pDevice); +HRESULT D3D10ReflectShader(const void *pShaderBytecode, SIZE_T BytecodeLength, ID3D10ShaderReflection **ppReflector); +HRESULT D3D10PreprocessShader(LPCSTR pSrcData, SIZE_T SrcDataSize, LPCSTR pFileName, const D3D10_SHADER_MACRO* pDefines, + LPD3D10INCLUDE pInclude, ID3D10Blob** ppShaderText, ID3D10Blob** ppErrorMsgs); +HRESULT D3D10GetInputSignatureBlob(const void *pShaderBytecode, SIZE_T BytecodeLength, ID3D10Blob **ppSignatureBlob); +HRESULT D3D10GetOutputSignatureBlob(const void *pShaderBytecode, SIZE_T BytecodeLength, ID3D10Blob **ppSignatureBlob); +HRESULT D3D10GetInputAndOutputSignatureBlob(const void *pShaderBytecode, SIZE_T BytecodeLength, ID3D10Blob **ppSignatureBlob); +HRESULT D3D10GetShaderDebugInfo(const void *pShaderBytecode, SIZE_T BytecodeLength, ID3D10Blob** ppDebugInfo); diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/d3d11.idl b/src/gallium/state_trackers/d3d1x/d3dapi/d3d11.idl new file mode 100644 index 0000000000..cac046fac7 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/d3d11.idl @@ -0,0 +1,2492 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +import "oaidl.idl"; +import "ocidl.idl"; +import "dxgi.idl"; +import "d3dcommon.idl"; + +const unsigned int D3D11_SDK_VERSION = 7; + +cpp_quote("#ifndef _D3D11_CONSTANTS") +cpp_quote("#define _D3D11_CONSTANTS") +const unsigned int D3D11_16BIT_INDEX_STRIP_CUT_VALUE = 0xffff; +const unsigned int D3D11_32BIT_INDEX_STRIP_CUT_VALUE = 0xffffffff; +const unsigned int D3D11_8BIT_INDEX_STRIP_CUT_VALUE = 0xff; +const unsigned int D3D11_ARRAY_AXIS_ADDRESS_RANGE_BIT_COUNT = 9; +const unsigned int D3D11_CLIP_OR_CULL_DISTANCE_COUNT = 8; +const unsigned int D3D11_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT = 2; +const unsigned int D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT = 14; +const unsigned int D3D11_COMMONSHADER_CONSTANT_BUFFER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_COMMONSHADER_CONSTANT_BUFFER_COMPONENTS = 4; +const unsigned int D3D11_COMMONSHADER_CONSTANT_BUFFER_HW_SLOT_COUNT = 15; +const unsigned int D3D11_COMMONSHADER_CONSTANT_BUFFER_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_COMMONSHADER_CONSTANT_BUFFER_REGISTER_COUNT = 15; +const unsigned int D3D11_COMMONSHADER_CONSTANT_BUFFER_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_COMMONSHADER_CONSTANT_BUFFER_REGISTER_READS_PER_INST = 1; +const unsigned int D3D11_COMMONSHADER_FLOWCONTROL_NESTING_LIMIT = 64; +const unsigned int D3D11_COMMONSHADER_IMMEDIATE_CONSTANT_BUFFER_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_COMMONSHADER_IMMEDIATE_CONSTANT_BUFFER_REGISTER_COUNT = 1; +const unsigned int D3D11_COMMONSHADER_IMMEDIATE_CONSTANT_BUFFER_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_COMMONSHADER_IMMEDIATE_CONSTANT_BUFFER_REGISTER_READS_PER_INST = 1; +const unsigned int D3D11_COMMONSHADER_IMMEDIATE_VALUE_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_COMMONSHADER_INPUT_RESOURCE_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_COMMONSHADER_INPUT_RESOURCE_REGISTER_COUNT = 128; +const unsigned int D3D11_COMMONSHADER_INPUT_RESOURCE_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_COMMONSHADER_INPUT_RESOURCE_REGISTER_READS_PER_INST = 1; +const unsigned int D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT = 128; +const unsigned int D3D11_COMMONSHADER_SAMPLER_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_COMMONSHADER_SAMPLER_REGISTER_COUNT = 16; +const unsigned int D3D11_COMMONSHADER_SAMPLER_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_COMMONSHADER_SAMPLER_REGISTER_READS_PER_INST = 1; +const unsigned int D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT = 16; +const unsigned int D3D11_COMMONSHADER_SUBROUTINE_NESTING_LIMIT = 32; +const unsigned int D3D11_COMMONSHADER_TEMP_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_COMMONSHADER_TEMP_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_COMMONSHADER_TEMP_REGISTER_COUNT = 4096; +const unsigned int D3D11_COMMONSHADER_TEMP_REGISTER_READ_PORTS = 3; +const unsigned int D3D11_COMMONSHADER_TEMP_REGISTER_READS_PER_INST = 3; +const unsigned int D3D11_COMMONSHADER_TEXCOORD_RANGE_REDUCTION_MAX = 10; +const int D3D11_COMMONSHADER_TEXCOORD_RANGE_REDUCTION_MIN = -10; +const int D3D11_COMMONSHADER_TEXEL_OFFSET_MAX_NEGATIVE = -8; +const unsigned int D3D11_COMMONSHADER_TEXEL_OFFSET_MAX_POSITIVE = 7; +const unsigned int D3D11_CS_4_X_BUCKET00_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 256; +const unsigned int D3D11_CS_4_X_BUCKET00_MAX_NUM_THREADS_PER_GROUP = 64; +const unsigned int D3D11_CS_4_X_BUCKET01_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 240; +const unsigned int D3D11_CS_4_X_BUCKET01_MAX_NUM_THREADS_PER_GROUP = 68; +const unsigned int D3D11_CS_4_X_BUCKET02_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 224; +const unsigned int D3D11_CS_4_X_BUCKET02_MAX_NUM_THREADS_PER_GROUP = 72; +const unsigned int D3D11_CS_4_X_BUCKET03_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 208; +const unsigned int D3D11_CS_4_X_BUCKET03_MAX_NUM_THREADS_PER_GROUP = 76; +const unsigned int D3D11_CS_4_X_BUCKET04_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 192; +const unsigned int D3D11_CS_4_X_BUCKET04_MAX_NUM_THREADS_PER_GROUP = 84; +const unsigned int D3D11_CS_4_X_BUCKET05_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 176; +const unsigned int D3D11_CS_4_X_BUCKET05_MAX_NUM_THREADS_PER_GROUP = 92; +const unsigned int D3D11_CS_4_X_BUCKET06_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 160; +const unsigned int D3D11_CS_4_X_BUCKET06_MAX_NUM_THREADS_PER_GROUP = 100; +const unsigned int D3D11_CS_4_X_BUCKET07_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 144; +const unsigned int D3D11_CS_4_X_BUCKET07_MAX_NUM_THREADS_PER_GROUP = 112; +const unsigned int D3D11_CS_4_X_BUCKET08_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 128; +const unsigned int D3D11_CS_4_X_BUCKET08_MAX_NUM_THREADS_PER_GROUP = 128; +const unsigned int D3D11_CS_4_X_BUCKET09_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 112; +const unsigned int D3D11_CS_4_X_BUCKET09_MAX_NUM_THREADS_PER_GROUP = 144; +const unsigned int D3D11_CS_4_X_BUCKET10_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 96; +const unsigned int D3D11_CS_4_X_BUCKET10_MAX_NUM_THREADS_PER_GROUP = 168; +const unsigned int D3D11_CS_4_X_BUCKET11_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 80; +const unsigned int D3D11_CS_4_X_BUCKET11_MAX_NUM_THREADS_PER_GROUP = 204; +const unsigned int D3D11_CS_4_X_BUCKET12_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 64; +const unsigned int D3D11_CS_4_X_BUCKET12_MAX_NUM_THREADS_PER_GROUP = 256; +const unsigned int D3D11_CS_4_X_BUCKET13_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 48; +const unsigned int D3D11_CS_4_X_BUCKET13_MAX_NUM_THREADS_PER_GROUP = 340; +const unsigned int D3D11_CS_4_X_BUCKET14_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 32; +const unsigned int D3D11_CS_4_X_BUCKET14_MAX_NUM_THREADS_PER_GROUP = 512; +const unsigned int D3D11_CS_4_X_BUCKET15_MAX_BYTES_TGSM_WRITABLE_PER_THREAD = 16; +const unsigned int D3D11_CS_4_X_BUCKET15_MAX_NUM_THREADS_PER_GROUP = 768; +const unsigned int D3D11_CS_4_X_DISPATCH_MAX_THREAD_GROUPS_IN_Z_DIMENSION = 1; +const unsigned int D3D11_CS_4_X_RAW_UAV_BYTE_ALIGNMENT = 256; +const unsigned int D3D11_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP = 768; +const unsigned int D3D11_CS_4_X_THREAD_GROUP_MAX_X = 768; +const unsigned int D3D11_CS_4_X_THREAD_GROUP_MAX_Y = 768; +const unsigned int D3D11_CS_4_X_UAV_REGISTER_COUNT = 1; +const unsigned int D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION = 65535; +const unsigned int D3D11_CS_TGSM_REGISTER_COUNT = 8192; +const unsigned int D3D11_CS_TGSM_REGISTER_READS_PER_INST = 1; +const unsigned int D3D11_CS_TGSM_RESOURCE_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_CS_TGSM_RESOURCE_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP = 1024; +const unsigned int D3D11_CS_THREAD_GROUP_MAX_X = 1024; +const unsigned int D3D11_CS_THREAD_GROUP_MAX_Y = 1024; +const unsigned int D3D11_CS_THREAD_GROUP_MAX_Z = 64; +const unsigned int D3D11_CS_THREAD_GROUP_MIN_X = 1; +const unsigned int D3D11_CS_THREAD_GROUP_MIN_Y = 1; +const unsigned int D3D11_CS_THREAD_GROUP_MIN_Z = 1; +const unsigned int D3D11_CS_THREAD_LOCAL_TEMP_REGISTER_POOL = 16384; +const float D3D11_DEFAULT_BLEND_FACTOR_ALPHA = 1.0; +const float D3D11_DEFAULT_BLEND_FACTOR_BLUE = 1.0; +const float D3D11_DEFAULT_BLEND_FACTOR_GREEN = 1.0; +const float D3D11_DEFAULT_BLEND_FACTOR_RED = 1.0; +const float D3D11_DEFAULT_BORDER_COLOR_COMPONENT = 0.0; +const unsigned int D3D11_DEFAULT_DEPTH_BIAS = 0; +const float D3D11_DEFAULT_DEPTH_BIAS_CLAMP = 0.0; +const unsigned int D3D11_DEFAULT_MAX_ANISOTROPY = 16; +const float D3D11_DEFAULT_MIP_LOD_BIAS = 0.0; +const unsigned int D3D11_DEFAULT_RENDER_TARGET_ARRAY_INDEX = 0; +const unsigned int D3D11_DEFAULT_SAMPLE_MASK = 0xffffffff; +const unsigned int D3D11_DEFAULT_SCISSOR_ENDX = 0; +const unsigned int D3D11_DEFAULT_SCISSOR_ENDY = 0; +const unsigned int D3D11_DEFAULT_SCISSOR_STARTX = 0; +const unsigned int D3D11_DEFAULT_SCISSOR_STARTY = 0; +const float D3D11_DEFAULT_SLOPE_SCALED_DEPTH_BIAS = 0.0; +const unsigned int D3D11_DEFAULT_STENCIL_READ_MASK = 0xff; +const unsigned int D3D11_DEFAULT_STENCIL_REFERENCE = 0; +const unsigned int D3D11_DEFAULT_STENCIL_WRITE_MASK = 0xff; +const unsigned int D3D11_DEFAULT_VIEWPORT_AND_SCISSORRECT_INDEX = 0; +const unsigned int D3D11_DEFAULT_VIEWPORT_HEIGHT = 0; +const float D3D11_DEFAULT_VIEWPORT_MAX_DEPTH = 0.0; +const float D3D11_DEFAULT_VIEWPORT_MIN_DEPTH = 0.0; +const unsigned int D3D11_DEFAULT_VIEWPORT_TOPLEFTX = 0; +const unsigned int D3D11_DEFAULT_VIEWPORT_TOPLEFTY = 0; +const unsigned int D3D11_DEFAULT_VIEWPORT_WIDTH = 0; +const unsigned int D3D11_DS_INPUT_CONTROL_POINT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_DS_INPUT_CONTROL_POINT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_DS_INPUT_CONTROL_POINT_REGISTER_COUNT = 32; +const unsigned int D3D11_DS_INPUT_CONTROL_POINT_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_DS_INPUT_CONTROL_POINT_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_DS_INPUT_CONTROL_POINTS_MAX_TOTAL_SCALARS = 3968; +const unsigned int D3D11_DS_INPUT_DOMAIN_POINT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_DS_INPUT_DOMAIN_POINT_REGISTER_COMPONENTS = 3; +const unsigned int D3D11_DS_INPUT_DOMAIN_POINT_REGISTER_COUNT = 1; +const unsigned int D3D11_DS_INPUT_DOMAIN_POINT_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_DS_INPUT_DOMAIN_POINT_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_DS_INPUT_PATCH_CONSTANT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_DS_INPUT_PATCH_CONSTANT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_DS_INPUT_PATCH_CONSTANT_REGISTER_COUNT = 32; +const unsigned int D3D11_DS_INPUT_PATCH_CONSTANT_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_DS_INPUT_PATCH_CONSTANT_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_DS_OUTPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_DS_OUTPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_DS_OUTPUT_REGISTER_COUNT = 32; +const unsigned int D3D11_FLOAT16_FUSED_TOLERANCE_IN_ULP = 0.6; +const float D3D11_FLOAT32_MAX = 3.402823466e+38; +const float D3D11_FLOAT32_TO_INTEGER_TOLERANCE_IN_ULP = 0.6; +const float D3D11_FLOAT_TO_SRGB_EXPONENT_DENOMINATOR = 2.4; +const float D3D11_FLOAT_TO_SRGB_EXPONENT_NUMERATOR = 1.0; +const float D3D11_FLOAT_TO_SRGB_OFFSET = 0.055; +const float D3D11_FLOAT_TO_SRGB_SCALE_1 = 12.92; +const float D3D11_FLOAT_TO_SRGB_SCALE_2 = 1.055; +const float D3D11_FLOAT_TO_SRGB_THRESHOLD = 0.0031308; +const float D3D11_FTOI_INSTRUCTION_MAX_INPUT = 2147483647.999; +const float D3D11_FTOI_INSTRUCTION_MIN_INPUT = -2147483648.999; +const float D3D11_FTOU_INSTRUCTION_MAX_INPUT = 4294967295.999; +const float D3D11_FTOU_INSTRUCTION_MIN_INPUT = 0.0; +const unsigned int D3D11_GS_INPUT_INSTANCE_ID_READ_PORTS = 1; +const unsigned int D3D11_GS_INPUT_INSTANCE_ID_READS_PER_INST = 2; +const unsigned int D3D11_GS_INPUT_INSTANCE_ID_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_GS_INPUT_INSTANCE_ID_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_GS_INPUT_INSTANCE_ID_REGISTER_COUNT = 1; +const unsigned int D3D11_GS_INPUT_PRIM_CONST_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_GS_INPUT_PRIM_CONST_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_GS_INPUT_PRIM_CONST_REGISTER_COUNT = 1; +const unsigned int D3D11_GS_INPUT_PRIM_CONST_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_GS_INPUT_PRIM_CONST_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_GS_INPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_GS_INPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_GS_INPUT_REGISTER_COUNT = 32; +const unsigned int D3D11_GS_INPUT_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_GS_INPUT_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_GS_INPUT_REGISTER_VERTICES = 32; +const unsigned int D3D11_GS_MAX_INSTANCE_COUNT = 32; +const unsigned int D3D11_GS_MAX_OUTPUT_VERTEX_COUNT_ACROSS_INSTANCES = 1024; +const unsigned int D3D11_GS_OUTPUT_ELEMENTS = 32; +const unsigned int D3D11_GS_OUTPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_GS_OUTPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_GS_OUTPUT_REGISTER_COUNT = 32; +const unsigned int D3D11_HS_CONTROL_POINT_PHASE_INPUT_REGISTER_COUNT = 32; +const unsigned int D3D11_HS_CONTROL_POINT_PHASE_OUTPUT_REGISTER_COUNT = 32; +const unsigned int D3D11_HS_CONTROL_POINT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_HS_CONTROL_POINT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_HS_CONTROL_POINT_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_HS_CONTROL_POINT_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_HS_FORK_PHASE_INSTANCE_COUNT_UPPER_BOUND = 0xffffffff; +const unsigned int D3D11_HS_INPUT_FORK_INSTANCE_ID_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_HS_INPUT_FORK_INSTANCE_ID_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_HS_INPUT_FORK_INSTANCE_ID_REGISTER_COUNT = 1; +const unsigned int D3D11_HS_INPUT_FORK_INSTANCE_ID_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_HS_INPUT_FORK_INSTANCE_ID_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_HS_INPUT_JOIN_INSTANCE_ID_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_HS_INPUT_JOIN_INSTANCE_ID_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_HS_INPUT_JOIN_INSTANCE_ID_REGISTER_COUNT = 1; +const unsigned int D3D11_HS_INPUT_JOIN_INSTANCE_ID_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_HS_INPUT_JOIN_INSTANCE_ID_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_HS_INPUT_PRIMITIVE_ID_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_HS_INPUT_PRIMITIVE_ID_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_HS_INPUT_PRIMITIVE_ID_REGISTER_COUNT = 1; +const unsigned int D3D11_HS_INPUT_PRIMITIVE_ID_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_HS_INPUT_PRIMITIVE_ID_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_HS_JOIN_PHASE_INSTANCE_COUNT_UPPER_BOUND = 0xffffffff; +const float D3D11_HS_MAXTESSFACTOR_LOWER_BOUND = 1.0; +const float D3D11_HS_MAXTESSFACTOR_UPPER_BOUND = 64.0; +const unsigned int D3D11_HS_OUTPUT_CONTROL_POINT_ID_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_HS_OUTPUT_CONTROL_POINT_ID_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_HS_OUTPUT_CONTROL_POINT_ID_REGISTER_COUNT = 1; +const unsigned int D3D11_HS_OUTPUT_CONTROL_POINT_ID_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_HS_OUTPUT_CONTROL_POINT_ID_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_HS_OUTPUT_CONTROL_POINTS_MAX_TOTAL_SCALARS = 3968; +const unsigned int D3D11_HS_OUTPUT_PATCH_CONSTANT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_HS_OUTPUT_PATCH_CONSTANT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_HS_OUTPUT_PATCH_CONSTANT_REGISTER_COUNT = 32; +const unsigned int D3D11_HS_OUTPUT_PATCH_CONSTANT_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_HS_OUTPUT_PATCH_CONSTANT_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_IA_DEFAULT_INDEX_BUFFER_OFFSET_IN_BYTES = 0; +const unsigned int D3D11_IA_DEFAULT_PRIMITIVE_TOPOLOGY = 0; +const unsigned int D3D11_IA_DEFAULT_VERTEX_BUFFER_OFFSET_IN_BYTES = 0; +const unsigned int D3D11_IA_INDEX_INPUT_RESOURCE_SLOT_COUNT = 1; +const unsigned int D3D11_IA_INSTANCE_ID_BIT_COUNT = 32; +const unsigned int D3D11_IA_INTEGER_ARITHMETIC_BIT_COUNT = 32; +const unsigned int D3D11_IA_PATCH_MAX_CONTROL_POINT_COUNT = 32; +const unsigned int D3D11_IA_PRIMITIVE_ID_BIT_COUNT = 32; +const unsigned int D3D11_IA_VERTEX_ID_BIT_COUNT = 32; +const unsigned int D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT = 32; +const unsigned int D3D11_IA_VERTEX_INPUT_STRUCTURE_ELEMENT_COUNT = 32; +const unsigned int D3D11_IA_VERTEX_INPUT_STRUCTURE_ELEMENTS_COMPONENTS = 128; +const unsigned int D3D11_INTEGER_DIVIDE_BY_ZERO_QUOTIENT = 0xffffffff; +const unsigned int D3D11_INTEGER_DIVIDE_BY_ZERO_REMAINDER = 0xffffffff; +const unsigned int D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL = 0xffffffff; +const unsigned int D3D11_KEEP_UNORDERED_ACCESS_VIEWS = 0xffffffff; +const float D3D11_LINEAR_GAMMA = 1.0; +const unsigned int D3D11_MAJOR_VERSION = 11; +const float D3D11_MAX_BORDER_COLOR_COMPONENT = 1.0; +const float D3D11_MAX_DEPTH = 1.0; +const unsigned int D3D11_MAX_MAXANISOTROPY = 16; +const unsigned int D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT = 32; +const float D3D11_MAX_POSITION_VALUE = 3.402823466e+34; +const unsigned int D3D11_MAX_TEXTURE_DIMENSION_2_TO_EXP = 17; +const float D3D11_MIN_BORDER_COLOR_COMPONENT = 0.0; +const float D3D11_MIN_DEPTH = 0.0; +const unsigned int D3D11_MIN_MAXANISOTROPY = 0; +const unsigned int D3D11_MINOR_VERSION = 0; +const float D3D11_MIP_LOD_BIAS_MAX = 15.99; +const float D3D11_MIP_LOD_BIAS_MIN = -16.0; +const unsigned int D3D11_MIP_LOD_FRACTIONAL_BIT_COUNT = 8; +const unsigned int D3D11_MIP_LOD_RANGE_BIT_COUNT = 8; +const float D3D11_MULTISAMPLE_ANTIALIAS_LINE_WIDTH = 1.4; +const unsigned int D3D11_NONSAMPLE_FETCH_OUT_OF_RANGE_ACCESS_RESULT = 0; +const unsigned int D3D11_PIXEL_ADDRESS_RANGE_BIT_COUNT = 15; +const unsigned int D3D11_PRE_SCISSOR_PIXEL_ADDRESS_RANGE_BIT_COUNT = 16; +const unsigned int D3D11_PS_CS_UAV_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_PS_CS_UAV_REGISTER_COUNT = 8; +const unsigned int D3D11_PS_CS_UAV_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_PS_CS_UAV_REGISTER_READS_PER_INST = 1; +const unsigned int D3D11_PS_FRONTFACING_DEFAULT_VALUE = 0xffffffff; +const unsigned int D3D11_PS_FRONTFACING_FALSE_VALUE = 0; +const unsigned int D3D11_PS_FRONTFACING_TRUE_VALUE = 0xffffffff; +const unsigned int D3D11_PS_INPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_PS_INPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_PS_INPUT_REGISTER_COUNT = 32; +const unsigned int D3D11_PS_INPUT_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_PS_INPUT_REGISTER_READS_PER_INST = 2; +const float D3D11_PS_LEGACY_PIXEL_CENTER_FRACTIONAL_COMPONENT = 0.0; +const unsigned int D3D11_PS_OUTPUT_DEPTH_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_PS_OUTPUT_DEPTH_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_PS_OUTPUT_DEPTH_REGISTER_COUNT = 1; +const unsigned int D3D11_PS_OUTPUT_MASK_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_PS_OUTPUT_MASK_REGISTER_COMPONENTS = 1; +const unsigned int D3D11_PS_OUTPUT_MASK_REGISTER_COUNT = 1; +const unsigned int D3D11_PS_OUTPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_PS_OUTPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_PS_OUTPUT_REGISTER_COUNT = 8; +const float D3D11_PS_PIXEL_CENTER_FRACTIONAL_COMPONENT = 0.5; +const unsigned int D3D11_RAW_UAV_SRV_BYTE_ALIGNMENT = 16; +const unsigned int D3D11_REQ_BLEND_OBJECT_COUNT_PER_DEVICE = 4096; +const unsigned int D3D11_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP = 27; +const unsigned int D3D11_REQ_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; +const unsigned int D3D11_REQ_DEPTH_STENCIL_OBJECT_COUNT_PER_DEVICE = 4096; +const unsigned int D3D11_REQ_DRAWINDEXED_INDEX_COUNT_2_TO_EXP = 32; +const unsigned int D3D11_REQ_DRAW_VERTEX_COUNT_2_TO_EXP = 32; +const unsigned int D3D11_REQ_FILTERING_HW_ADDRESSABLE_RESOURCE_DIMENSION = 16384; +const unsigned int D3D11_REQ_GS_INVOCATION_32BIT_OUTPUT_COMPONENT_LIMIT = 1024; +const unsigned int D3D11_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; +const unsigned int D3D11_REQ_MAXANISOTROPY = 16; +const unsigned int D3D11_REQ_MIP_LEVELS = 15; +const unsigned int D3D11_REQ_MULTI_ELEMENT_STRUCTURE_SIZE_IN_BYTES = 2048; +const unsigned int D3D11_REQ_RASTERIZER_OBJECT_COUNT_PER_DEVICE = 4096; +const unsigned int D3D11_REQ_RENDER_TO_BUFFER_WINDOW_WIDTH = 16384; +const unsigned int D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM = 128; +const float D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_B_TERM = 0.25; +const unsigned int D3D11_REQ_RESOURCE_VIEW_COUNT_PER_DEVICE_2_TO_EXP = 20; +const unsigned int D3D11_REQ_SAMPLER_OBJECT_COUNT_PER_DEVICE = 4096; +const unsigned int D3D11_REQ_TEXTURE1D_ARRAY_AXIS_DIMENSION = 2048; +const unsigned int D3D11_REQ_TEXTURE1D_U_DIMENSION = 16384; +const unsigned int D3D11_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION = 2048; +const unsigned int D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION = 16384; +const unsigned int D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION = 2048; +const unsigned int D3D11_REQ_TEXTURECUBE_DIMENSION = 16384; +const unsigned int D3D11_RESINFO_INSTRUCTION_MISSING_COMPONENT_RETVAL = 0; +const unsigned int D3D11_SHADER_MAJOR_VERSION = 5; +const unsigned int D3D11_SHADER_MAX_INSTANCES = 65535; +const unsigned int D3D11_SHADER_MAX_INTERFACE_CALL_SITES = 4096; +const unsigned int D3D11_SHADER_MAX_INTERFACES = 253; +const unsigned int D3D11_SHADER_MAX_TYPES = 65535; +const unsigned int D3D11_SHADER_MINOR_VERSION = 0; +const unsigned int D3D11_SHIFT_INSTRUCTION_PAD_VALUE = 0; +const unsigned int D3D11_SHIFT_INSTRUCTION_SHIFT_VALUE_BIT_COUNT = 5; +const unsigned int D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT = 8; +const unsigned int D3D11_SO_BUFFER_MAX_STRIDE_IN_BYTES = 2048; +const unsigned int D3D11_SO_BUFFER_MAX_WRITE_WINDOW_IN_BYTES = 512; +const unsigned int D3D11_SO_BUFFER_SLOT_COUNT = 4; +const unsigned int D3D11_SO_DDI_REGISTER_INDEX_DENOTING_GAP = 0xffffffff; +const unsigned int D3D11_SO_NO_RASTERIZED_STREAM = 0xffffffff; +const unsigned int D3D11_SO_OUTPUT_COMPONENT_COUNT = 128; +const unsigned int D3D11_SO_STREAM_COUNT = 4; +const unsigned int D3D11_SPEC_DATE_DAY = 04; +const unsigned int D3D11_SPEC_DATE_MONTH = 06; +const unsigned int D3D11_SPEC_DATE_YEAR = 2009; +const unsigned int D3D11_SPEC_VERSION = 1.0; +const float D3D11_SRGB_GAMMA = 2.2; +const float D3D11_SRGB_TO_FLOAT_DENOMINATOR_1 = 12.92; +const float D3D11_SRGB_TO_FLOAT_DENOMINATOR_2 = 1.055; +const float D3D11_SRGB_TO_FLOAT_EXPONENT = 2.4; +const float D3D11_SRGB_TO_FLOAT_OFFSET = 0.055; +const float D3D11_SRGB_TO_FLOAT_THRESHOLD = 0.04045; +const float D3D11_SRGB_TO_FLOAT_TOLERANCE_IN_ULP = 0.5; +const unsigned int D3D11_STANDARD_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_STANDARD_COMPONENT_BIT_COUNT_DOUBLED = 64; +const unsigned int D3D11_STANDARD_MAXIMUM_ELEMENT_ALIGNMENT_BYTE_MULTIPLE = 4; +const unsigned int D3D11_STANDARD_PIXEL_COMPONENT_COUNT = 128; +const unsigned int D3D11_STANDARD_PIXEL_ELEMENT_COUNT = 32; +const unsigned int D3D11_STANDARD_VECTOR_SIZE = 4; +const unsigned int D3D11_STANDARD_VERTEX_ELEMENT_COUNT = 32; +const unsigned int D3D11_STANDARD_VERTEX_TOTAL_COMPONENT_COUNT = 64; +const unsigned int D3D11_SUBPIXEL_FRACTIONAL_BIT_COUNT = 8; +const unsigned int D3D11_SUBTEXEL_FRACTIONAL_BIT_COUNT = 8; +const unsigned int D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64; +const unsigned int D3D11_TESSELLATOR_MAX_ISOLINE_DENSITY_TESSELLATION_FACTOR = 64; +const unsigned int D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63; +const unsigned int D3D11_TESSELLATOR_MAX_TESSELLATION_FACTOR = 64; +const unsigned int D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2; +const unsigned int D3D11_TESSELLATOR_MIN_ISOLINE_DENSITY_TESSELLATION_FACTOR = 1; +const unsigned int D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1; +const unsigned int D3D11_TEXEL_ADDRESS_RANGE_BIT_COUNT = 16; +const unsigned int D3D11_UNBOUND_MEMORY_ACCESS_RESULT = 0; +const unsigned int D3D11_VIEWPORT_AND_SCISSORRECT_MAX_INDEX = 15; +const unsigned int D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE = 16; +const unsigned int D3D11_VIEWPORT_BOUNDS_MAX = 32767; +const int D3D11_VIEWPORT_BOUNDS_MIN = -32768; +const unsigned int D3D11_VS_INPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_VS_INPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_VS_INPUT_REGISTER_COUNT = 32; +const unsigned int D3D11_VS_INPUT_REGISTER_READ_PORTS = 1; +const unsigned int D3D11_VS_INPUT_REGISTER_READS_PER_INST = 2; +const unsigned int D3D11_VS_OUTPUT_REGISTER_COMPONENT_BIT_COUNT = 32; +const unsigned int D3D11_VS_OUTPUT_REGISTER_COMPONENTS = 4; +const unsigned int D3D11_VS_OUTPUT_REGISTER_COUNT = 32; +const unsigned int D3D11_WHQL_CONTEXT_COUNT_FOR_RESOURCE_LIMIT = 10; +const unsigned int D3D11_WHQL_DRAWINDEXED_INDEX_COUNT_2_TO_EXP = 25; +const unsigned int D3D11_WHQL_DRAW_VERTEX_COUNT_2_TO_EXP = 25; +cpp_quote("#endif") + +const unsigned int _FACD3D11 = 0x87C; +const unsigned int _FACD3D11DEBUG = _FACD3D11 + 1; + +cpp_quote("#define MAKE_D3D11_HRESULT(c) MAKE_HRESULT(1, _FACD3D11, (c))") +cpp_quote("#define MAKE_D3D11_STATUS(c) MAKE_HRESULT(0, _FACD3D11, (c))") +cpp_quote("#define D3D11_ERROR_TOO_MANY_UNIQUE_STATE_OBJECTS MAKE_D3D11_HRESULT(1)") +cpp_quote("#define D3D11_ERROR_FILE_NOT_FOUND MAKE_D3D11_HRESULT(2)") +cpp_quote("#define D3D11_ERROR_TOO_MANY_UNIQUE_VIEW_OBJECTS MAKE_D3D11_HRESULT(3)") +cpp_quote("#define D3D11_ERROR_DEFERRED_CONTEXT_MAP_WITHOUT_INITIAL_DISCARD MAKE_D3D11_HRESULT(4)") + +typedef enum D3D11_INPUT_CLASSIFICATION +{ + D3D11_INPUT_PER_VERTEX_DATA, + D3D11_INPUT_PER_INSTANCE_DATA +} D3D11_INPUT_CLASSIFICATION; + +const unsigned int D3D11_APPEND_ALIGNED_ELEMENT = 0xffffffff; + +typedef struct D3D11_INPUT_ELEMENT_DESC +{ + LPCSTR SemanticName; + UINT SemanticIndex; + DXGI_FORMAT Format; + UINT InputSlot; + UINT AlignedByteOffset; + D3D11_INPUT_CLASSIFICATION InputSlotClass; + UINT InstanceDataStepRate; +} D3D11_INPUT_ELEMENT_DESC; + +typedef enum D3D11_FILL_MODE +{ + D3D11_FILL_WIREFRAME = 2, + D3D11_FILL_SOLID = 3 +} D3D11_FILL_MODE; + +typedef D3D_PRIMITIVE_TOPOLOGY D3D11_PRIMITIVE_TOPOLOGY; + +typedef D3D_PRIMITIVE D3D11_PRIMITIVE; + +typedef enum D3D11_CULL_MODE +{ + D3D11_CULL_NONE = 1, + D3D11_CULL_FRONT = 2, + D3D11_CULL_BACK = 3 +} D3D11_CULL_MODE; + +typedef struct D3D11_SO_DECLARATION_ENTRY +{ + UINT Stream; + LPCSTR SemanticName; + UINT SemanticIndex; + BYTE StartComponent; + BYTE ComponentCount; + BYTE OutputSlot; +} D3D11_SO_DECLARATION_ENTRY; + +typedef struct D3D11_VIEWPORT +{ + FLOAT TopLeftX; + FLOAT TopLeftY; + FLOAT Width; + FLOAT Height; + FLOAT MinDepth; + FLOAT MaxDepth; +} D3D11_VIEWPORT; + +typedef enum D3D11_RESOURCE_DIMENSION +{ + D3D11_RESOURCE_DIMENSION_UNKNOWN, + D3D11_RESOURCE_DIMENSION_BUFFER, + D3D11_RESOURCE_DIMENSION_TEXTURE1D, + D3D11_RESOURCE_DIMENSION_TEXTURE2D, + D3D11_RESOURCE_DIMENSION_TEXTURE3D, +} D3D11_RESOURCE_DIMENSION; + +typedef D3D_SRV_DIMENSION D3D11_SRV_DIMENSION; + +typedef enum D3D11_DSV_DIMENSION +{ + D3D11_DSV_DIMENSION_UNKNOWN, + D3D11_DSV_DIMENSION_TEXTURE1D, + D3D11_DSV_DIMENSION_TEXTURE1DARRAY, + D3D11_DSV_DIMENSION_TEXTURE2D, + D3D11_DSV_DIMENSION_TEXTURE2DARRAY, + D3D11_DSV_DIMENSION_TEXTURE2DMS, + D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY, +} D3D11_DSV_DIMENSION; + + +typedef enum D3D11_RTV_DIMENSION +{ + D3D11_RTV_DIMENSION_UNKNOWN, + D3D11_RTV_DIMENSION_BUFFER, + D3D11_RTV_DIMENSION_TEXTURE1D, + D3D11_RTV_DIMENSION_TEXTURE1DARRAY, + D3D11_RTV_DIMENSION_TEXTURE2D, + D3D11_RTV_DIMENSION_TEXTURE2DARRAY, + D3D11_RTV_DIMENSION_TEXTURE2DMS, + D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY, + D3D11_RTV_DIMENSION_TEXTURE3D +} D3D11_RTV_DIMENSION; + +typedef enum D3D11_UAV_DIMENSION +{ + D3D11_UAV_DIMENSION_UNKNOWN, + D3D11_UAV_DIMENSION_BUFFER, + D3D11_UAV_DIMENSION_TEXTURE1D, + D3D11_UAV_DIMENSION_TEXTURE1DARRAY, + D3D11_UAV_DIMENSION_TEXTURE2D, + D3D11_UAV_DIMENSION_TEXTURE2DARRAY, + D3D11_UAV_DIMENSION_TEXTURE3D = 8 +} D3D11_UAV_DIMENSION; + +typedef enum D3D11_USAGE +{ + D3D11_USAGE_DEFAULT, + D3D11_USAGE_IMMUTABLE, + D3D11_USAGE_DYNAMIC, + D3D11_USAGE_STAGING +} D3D11_USAGE; + +typedef enum D3D11_BIND_FLAG +{ + D3D11_BIND_VERTEX_BUFFER = 1, + D3D11_BIND_INDEX_BUFFER = 2, + D3D11_BIND_CONSTANT_BUFFER = 4, + D3D11_BIND_SHADER_RESOURCE = 8, + D3D11_BIND_STREAM_OUTPUT = 0x10, + D3D11_BIND_RENDER_TARGET = 0x20, + D3D11_BIND_DEPTH_STENCIL = 0x40, + D3D11_BIND_UNORDERED_ACCESS = 0x80 +} D3D11_BIND_FLAG; + +typedef enum D3D11_CPU_ACCESS_FLAG +{ + D3D11_CPU_ACCESS_WRITE = 0x10000, + D3D11_CPU_ACCESS_READ = 0x20000 +} D3D11_CPU_ACCESS_FLAG; + +typedef enum D3D11_RESOURCE_MISC_FLAG +{ + D3D11_RESOURCE_MISC_GENERATE_MIPS = 1, + D3D11_RESOURCE_MISC_SHARED = 2, + D3D11_RESOURCE_MISC_TEXTURECUBE = 4, + D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS = 0x10, + D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS = 0x20, + D3D11_RESOURCE_MISC_BUFFER_STRUCTURED = 0x40, + D3D11_RESOURCE_MISC_RESOURCE_CLAMP = 0x80, + D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX = 0x100, + D3D11_RESOURCE_MISC_GDI_COMPATIBLE = 0x200 +} D3D11_RESOURCE_MISC_FLAG; + +typedef enum D3D11_MAP +{ + D3D11_MAP_READ = 1, + D3D11_MAP_WRITE, + D3D11_MAP_READ_WRITE, + D3D11_MAP_WRITE_DISCARD, + D3D11_MAP_WRITE_NO_OVERWRITE +} D3D11_MAP; + +typedef enum D3D11_MAP_FLAG +{ + D3D11_MAP_FLAG_DO_NOT_WAIT = 0x100000 +} D3D11_MAP_FLAG; + +typedef enum D3D11_RAISE_FLAG +{ + D3D11_RAISE_FLAG_DRIVER_INTERNAL_ERROR = 1 +} D3D11_RAISE_FLAG; + +typedef +enum D3D11_CLEAR_FLAG +{ + D3D11_CLEAR_DEPTH = 1, + D3D11_CLEAR_STENCIL = 2 +} D3D11_CLEAR_FLAG; + +typedef RECT D3D11_RECT; + +typedef struct D3D11_BOX +{ + UINT left; + UINT top; + UINT front; + UINT right; + UINT bottom; + UINT back; +} D3D11_BOX; + +typedef enum D3D11_COMPARISON_FUNC +{ + D3D11_COMPARISON_NEVER = 1, + D3D11_COMPARISON_LESS, + D3D11_COMPARISON_EQUAL, + D3D11_COMPARISON_LESS_EQUAL, + D3D11_COMPARISON_GREATER, + D3D11_COMPARISON_NOT_EQUAL, + D3D11_COMPARISON_GREATER_EQUAL, + D3D11_COMPARISON_ALWAYS +} D3D11_COMPARISON_FUNC; + +typedef enum D3D11_DEPTH_WRITE_MASK +{ + D3D11_DEPTH_WRITE_MASK_ZERO, + D3D11_DEPTH_WRITE_MASK_ALL +} D3D11_DEPTH_WRITE_MASK; + +typedef enum D3D11_STENCIL_OP +{ + D3D11_STENCIL_OP_KEEP = 1, + D3D11_STENCIL_OP_ZERO, + D3D11_STENCIL_OP_REPLACE, + D3D11_STENCIL_OP_INCR_SAT, + D3D11_STENCIL_OP_DECR_SAT, + D3D11_STENCIL_OP_INVERT, + D3D11_STENCIL_OP_INCR, + D3D11_STENCIL_OP_DECR +} D3D11_STENCIL_OP; + +typedef struct D3D11_DEPTH_STENCILOP_DESC +{ + D3D11_STENCIL_OP StencilFailOp; + D3D11_STENCIL_OP StencilDepthFailOp; + D3D11_STENCIL_OP StencilPassOp; + D3D11_COMPARISON_FUNC StencilFunc; +} D3D11_DEPTH_STENCILOP_DESC; + +typedef struct D3D11_DEPTH_STENCIL_DESC +{ + BOOL DepthEnable; + D3D11_DEPTH_WRITE_MASK DepthWriteMask; + D3D11_COMPARISON_FUNC DepthFunc; + BOOL StencilEnable; + UINT8 StencilReadMask; + UINT8 StencilWriteMask; + D3D11_DEPTH_STENCILOP_DESC FrontFace; + D3D11_DEPTH_STENCILOP_DESC BackFace; +} D3D11_DEPTH_STENCIL_DESC; + + +typedef enum D3D11_BLEND +{ + D3D11_BLEND_ZERO = 1, + D3D11_BLEND_ONE, + D3D11_BLEND_SRC_COLOR, + D3D11_BLEND_INV_SRC_COLOR, + D3D11_BLEND_SRC_ALPHA, + D3D11_BLEND_INV_SRC_ALPHA, + D3D11_BLEND_DEST_ALPHA, + D3D11_BLEND_INV_DEST_ALPHA, + D3D11_BLEND_DEST_COLOR, + D3D11_BLEND_INV_DEST_COLOR , + D3D11_BLEND_SRC_ALPHA_SAT, + + D3D11_BLEND_BLEND_FACTOR = 14, + D3D11_BLEND_INV_BLEND_FACTOR, + D3D11_BLEND_SRC1_COLOR, + D3D11_BLEND_INV_SRC1_COLOR , + D3D11_BLEND_SRC1_ALPHA, + D3D11_BLEND_INV_SRC1_ALPHA +} D3D11_BLEND; + +typedef enum D3D11_BLEND_OP +{ + D3D11_BLEND_OP_ADD = 1, + D3D11_BLEND_OP_SUBTRACT, + D3D11_BLEND_OP_REV_SUBTRACT, + D3D11_BLEND_OP_MIN, + D3D11_BLEND_OP_MAX +} D3D11_BLEND_OP; + +typedef enum D3D11_COLOR_WRITE_ENABLE +{ + D3D11_COLOR_WRITE_ENABLE_RED = 1, + D3D11_COLOR_WRITE_ENABLE_GREEN = 2, + D3D11_COLOR_WRITE_ENABLE_BLUE = 4, + D3D11_COLOR_WRITE_ENABLE_ALPHA = 8, + D3D11_COLOR_WRITE_ENABLE_ALL = 0xf +} D3D11_COLOR_WRITE_ENABLE; + +typedef struct D3D11_RENDER_TARGET_BLEND_DESC +{ + BOOL BlendEnable; + D3D11_BLEND SrcBlend; + D3D11_BLEND DestBlend; + D3D11_BLEND_OP BlendOp; + D3D11_BLEND SrcBlendAlpha; + D3D11_BLEND DestBlendAlpha; + D3D11_BLEND_OP BlendOpAlpha; + UINT8 RenderTargetWriteMask; +} D3D11_RENDER_TARGET_BLEND_DESC; + +typedef struct D3D11_BLEND_DESC +{ + BOOL AlphaToCoverageEnable; + BOOL IndependentBlendEnable; + D3D11_RENDER_TARGET_BLEND_DESC RenderTarget[8]; +} D3D11_BLEND_DESC; + +typedef struct D3D11_RASTERIZER_DESC +{ + D3D11_FILL_MODE FillMode; + D3D11_CULL_MODE CullMode; + BOOL FrontCounterClockwise; + INT DepthBias; + FLOAT DepthBiasClamp; + FLOAT SlopeScaledDepthBias; + BOOL DepthClipEnable; + BOOL ScissorEnable; + BOOL MultisampleEnable; + BOOL AntialiasedLineEnable; +} D3D11_RASTERIZER_DESC; + +typedef struct D3D11_SUBRESOURCE_DATA +{ + const void *pSysMem; + UINT SysMemPitch; + UINT SysMemSlicePitch; +} D3D11_SUBRESOURCE_DATA; + +typedef struct D3D11_MAPPED_SUBRESOURCE +{ + void *pData; + UINT RowPitch; + UINT DepthPitch; +} D3D11_MAPPED_SUBRESOURCE; + +typedef struct D3D11_BUFFER_DESC +{ + UINT ByteWidth; + D3D11_USAGE Usage; + UINT BindFlags; + UINT CPUAccessFlags; + UINT MiscFlags; + UINT StructureByteStride; +} D3D11_BUFFER_DESC; + +typedef struct D3D11_TEXTURE1D_DESC +{ + UINT Width; + UINT MipLevels; + UINT ArraySize; + DXGI_FORMAT Format; + D3D11_USAGE Usage; + UINT BindFlags; + UINT CPUAccessFlags; + UINT MiscFlags; +} D3D11_TEXTURE1D_DESC; + +typedef struct D3D11_TEXTURE2D_DESC +{ + UINT Width; + UINT Height; + UINT MipLevels; + UINT ArraySize; + DXGI_FORMAT Format; + DXGI_SAMPLE_DESC SampleDesc; + D3D11_USAGE Usage; + UINT BindFlags; + UINT CPUAccessFlags; + UINT MiscFlags; +} D3D11_TEXTURE2D_DESC; + +typedef struct D3D11_TEXTURE3D_DESC +{ + UINT Width; + UINT Height; + UINT Depth; + UINT MipLevels; + DXGI_FORMAT Format; + D3D11_USAGE Usage; + UINT BindFlags; + UINT CPUAccessFlags; + UINT MiscFlags; +} D3D11_TEXTURE3D_DESC; + +typedef enum D3D11_TEXTURECUBE_FACE +{ + D3D11_TEXTURECUBE_FACE_POSITIVE_X, + D3D11_TEXTURECUBE_FACE_NEGATIVE_X, + D3D11_TEXTURECUBE_FACE_POSITIVE_Y, + D3D11_TEXTURECUBE_FACE_NEGATIVE_Y, + D3D11_TEXTURECUBE_FACE_POSITIVE_Z, + D3D11_TEXTURECUBE_FACE_NEGATIVE_Z +} D3D11_TEXTURECUBE_FACE; + +typedef struct D3D11_BUFFER_SRV +{ + union + { + UINT FirstElement; + UINT ElementOffset; + }; + union + { + UINT NumElements; + UINT ElementWidth; + }; +} D3D11_BUFFER_SRV; + +typedef enum D3D11_BUFFEREX_SRV_FLAG +{ + D3D11_BUFFEREX_SRV_FLAG_RAW = 1 +} D3D11_BUFFEREX_SRV_FLAG; + +typedef struct D3D11_BUFFEREX_SRV +{ + UINT FirstElement; + UINT NumElements; + UINT Flags; +} D3D11_BUFFEREX_SRV; + +typedef struct D3D11_TEX1D_SRV +{ + UINT MostDetailedMip; + UINT MipLevels; +} D3D11_TEX1D_SRV; + +typedef struct D3D11_TEX1D_ARRAY_SRV +{ + UINT MostDetailedMip; + UINT MipLevels; + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX1D_ARRAY_SRV; + +typedef struct D3D11_TEX2D_SRV +{ + UINT MostDetailedMip; + UINT MipLevels; +} D3D11_TEX2D_SRV; + +typedef struct D3D11_TEX2D_ARRAY_SRV +{ + UINT MostDetailedMip; + UINT MipLevels; + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX2D_ARRAY_SRV; + +typedef struct D3D11_TEX3D_SRV +{ + UINT MostDetailedMip; + UINT MipLevels; +} D3D11_TEX3D_SRV; + +typedef struct D3D11_TEXCUBE_SRV +{ + UINT MostDetailedMip; + UINT MipLevels; +} D3D11_TEXCUBE_SRV; + +typedef struct D3D11_TEXCUBE_ARRAY_SRV +{ + UINT MostDetailedMip; + UINT MipLevels; + UINT First2DArrayFace; + UINT NumCubes; +} D3D11_TEXCUBE_ARRAY_SRV; + +typedef struct D3D11_TEX2DMS_SRV +{ + UINT UnusedField_NothingToDefine; +} D3D11_TEX2DMS_SRV; + +typedef struct D3D11_TEX2DMS_ARRAY_SRV +{ + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX2DMS_ARRAY_SRV; + +typedef struct D3D11_SHADER_RESOURCE_VIEW_DESC +{ + DXGI_FORMAT Format; + D3D11_SRV_DIMENSION ViewDimension; + union + { + D3D11_BUFFER_SRV Buffer; + D3D11_TEX1D_SRV Texture1D; + D3D11_TEX1D_ARRAY_SRV Texture1DArray; + D3D11_TEX2D_SRV Texture2D; + D3D11_TEX2D_ARRAY_SRV Texture2DArray; + D3D11_TEX2DMS_SRV Texture2DMS; + D3D11_TEX2DMS_ARRAY_SRV Texture2DMSArray; + D3D11_TEX3D_SRV Texture3D; + D3D11_TEXCUBE_SRV TextureCube; + D3D11_TEXCUBE_ARRAY_SRV TextureCubeArray; + D3D11_BUFFEREX_SRV BufferEx; + }; +} D3D11_SHADER_RESOURCE_VIEW_DESC; + + +typedef struct D3D11_BUFFER_RTV +{ + union + { + UINT FirstElement; + UINT ElementOffset; + }; + union + { + UINT NumElements; + UINT ElementWidth; + }; +} D3D11_BUFFER_RTV; + +typedef struct D3D11_TEX1D_RTV +{ + UINT MipSlice; +} D3D11_TEX1D_RTV; + +typedef struct D3D11_TEX1D_ARRAY_RTV +{ + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX1D_ARRAY_RTV; + +typedef struct D3D11_TEX2D_RTV +{ + UINT MipSlice; +} D3D11_TEX2D_RTV; + +typedef struct D3D11_TEX2DMS_RTV +{ + UINT UnusedField_NothingToDefine; +} D3D11_TEX2DMS_RTV; + +typedef struct D3D11_TEX2D_ARRAY_RTV +{ + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX2D_ARRAY_RTV; + +typedef struct D3D11_TEX2DMS_ARRAY_RTV +{ + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX2DMS_ARRAY_RTV; + +typedef struct D3D11_TEX3D_RTV +{ + UINT MipSlice; + UINT FirstWSlice; + UINT WSize; +} D3D11_TEX3D_RTV; + +typedef struct D3D11_RENDER_TARGET_VIEW_DESC +{ + DXGI_FORMAT Format; + D3D11_RTV_DIMENSION ViewDimension; + union + { + D3D11_BUFFER_RTV Buffer; + D3D11_TEX1D_RTV Texture1D; + D3D11_TEX1D_ARRAY_RTV Texture1DArray; + D3D11_TEX2D_RTV Texture2D; + D3D11_TEX2D_ARRAY_RTV Texture2DArray; + D3D11_TEX2DMS_RTV Texture2DMS; + D3D11_TEX2DMS_ARRAY_RTV Texture2DMSArray; + D3D11_TEX3D_RTV Texture3D; + }; +} D3D11_RENDER_TARGET_VIEW_DESC; + +typedef struct D3D11_TEX1D_DSV +{ + UINT MipSlice; +} D3D11_TEX1D_DSV; + +typedef struct D3D11_TEX1D_ARRAY_DSV +{ + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX1D_ARRAY_DSV; + +typedef struct D3D11_TEX2D_DSV +{ + UINT MipSlice; +} D3D11_TEX2D_DSV; + +typedef struct D3D11_TEX2D_ARRAY_DSV +{ + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX2D_ARRAY_DSV; + +typedef struct D3D11_TEX2DMS_DSV +{ + UINT UnusedField_NothingToDefine; +} D3D11_TEX2DMS_DSV; + +typedef struct D3D11_TEX2DMS_ARRAY_DSV +{ + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX2DMS_ARRAY_DSV; + +typedef enum D3D11_DSV_FLAG +{ + D3D11_DSV_READ_ONLY_DEPTH = 1L, + D3D11_DSV_READ_ONLY_STENCIL = 2L +} D3D11_DSV_FLAG; + +typedef struct D3D11_DEPTH_STENCIL_VIEW_DESC +{ + DXGI_FORMAT Format; + D3D11_DSV_DIMENSION ViewDimension; + UINT Flags; + union + { + D3D11_TEX1D_DSV Texture1D; + D3D11_TEX1D_ARRAY_DSV Texture1DArray; + D3D11_TEX2D_DSV Texture2D; + D3D11_TEX2D_ARRAY_DSV Texture2DArray; + D3D11_TEX2DMS_DSV Texture2DMS; + D3D11_TEX2DMS_ARRAY_DSV Texture2DMSArray; + }; +} D3D11_DEPTH_STENCIL_VIEW_DESC; + +typedef enum D3D11_BUFFER_UAV_FLAG +{ + D3D11_BUFFER_UAV_FLAG_RAW = 1, + D3D11_BUFFER_UAV_FLAG_APPEND = 2, + D3D11_BUFFER_UAV_FLAG_COUNTER = 4 +} D3D11_BUFFER_UAV_FLAG; + +typedef struct D3D11_BUFFER_UAV +{ + UINT FirstElement; + UINT NumElements; + UINT Flags; +} D3D11_BUFFER_UAV; + +typedef struct D3D11_TEX1D_UAV +{ + UINT MipSlice; +} D3D11_TEX1D_UAV; + +typedef struct D3D11_TEX1D_ARRAY_UAV +{ + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX1D_ARRAY_UAV; + +typedef struct D3D11_TEX2D_UAV +{ + UINT MipSlice; +} D3D11_TEX2D_UAV; + +typedef struct D3D11_TEX2D_ARRAY_UAV +{ + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} D3D11_TEX2D_ARRAY_UAV; + +typedef struct D3D11_TEX3D_UAV +{ + UINT MipSlice; + UINT FirstWSlice; + UINT WSize; +} D3D11_TEX3D_UAV; + +typedef struct D3D11_UNORDERED_ACCESS_VIEW_DESC +{ + DXGI_FORMAT Format; + D3D11_UAV_DIMENSION ViewDimension; + union + { + D3D11_BUFFER_UAV Buffer; + D3D11_TEX1D_UAV Texture1D; + D3D11_TEX1D_ARRAY_UAV Texture1DArray; + D3D11_TEX2D_UAV Texture2D; + D3D11_TEX2D_ARRAY_UAV Texture2DArray; + D3D11_TEX3D_UAV Texture3D; + }; +} D3D11_UNORDERED_ACCESS_VIEW_DESC; + +typedef enum D3D11_FILTER +{ + D3D11_FILTER_MIN_MAG_MIP_POINT = 0, + D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR = 1, + D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT = 4, + D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR = 5, + D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT = 0x10, + D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR = 0x11, + D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT = 0x14, + D3D11_FILTER_MIN_MAG_MIP_LINEAR = 0x15, + D3D11_FILTER_ANISOTROPIC = 0x55, + D3D11_FILTER_COMPARISON_MIN_MAG_MIP_POINT = 0x80, + D3D11_FILTER_COMPARISON_MIN_MAG_POINT_MIP_LINEAR = 0x81, + D3D11_FILTER_COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT = 0x84, + D3D11_FILTER_COMPARISON_MIN_POINT_MAG_MIP_LINEAR = 0x85, + D3D11_FILTER_COMPARISON_MIN_LINEAR_MAG_MIP_POINT = 0x90, + D3D11_FILTER_COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR = 0x91, + D3D11_FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT = 0x94, + D3D11_FILTER_COMPARISON_MIN_MAG_MIP_LINEAR = 0x95, + D3D11_FILTER_COMPARISON_ANISOTROPIC = 0xd5 +} D3D11_FILTER; + +typedef enum D3D11_FILTER_TYPE +{ + D3D11_FILTER_TYPE_POINT, + D3D11_FILTER_TYPE_LINEAR +} D3D11_FILTER_TYPE; + +const unsigned int D3D11_FILTER_TYPE_MASK = 3; +const unsigned int D3D11_MIN_FILTER_SHIFT = 4; +const unsigned int D3D11_MAG_FILTER_SHIFT = 2; +const unsigned int D3D11_MIP_FILTER_SHIFT = 0; +const unsigned int D3D11_COMPARISON_FILTERING_BIT = 0x80; +const unsigned int D3D11_ANISOTROPIC_FILTERING_BIT = 0x40; + +cpp_quote("#define D3D11_ENCODE_BASIC_FILTER(min, mag, mip, comp) ((D3D11_FILTER) (((comp) ? D3D11_COMPARISON_FILTERING_BIT : 0 ) | (((min) & D3D11_FILTER_TYPE_MASK ) << D3D11_MIN_FILTER_SHIFT ) | (((mag) & D3D11_FILTER_TYPE_MASK ) << D3D11_MAG_FILTER_SHIFT ) | (((mip) & D3D11_FILTER_TYPE_MASK) << D3D11_MIP_FILTER_SHIFT)))") +cpp_quote("#define D3D11_ENCODE_ANISOTROPIC_FILTER(comp) ((D3D11_FILTER)(D3D11_ANISOTROPIC_FILTERING_BIT | D3D11_ENCODE_BASIC_FILTER(D3D11_FILTER_TYPE_LINEAR, D3D11_FILTER_TYPE_LINEAR, D3D11_FILTER_TYPE_LINEAR, comp)))") +cpp_quote("#define D3D11_DECODE_MIN_FILTER(f) ((D3D11_FILTER_TYPE)(((f) >> D3D11_MIN_FILTER_SHIFT) & D3D11_FILTER_TYPE_MASK))") +cpp_quote("#define D3D11_DECODE_MAG_FILTER(f) ((D3D11_FILTER_TYPE)(((f) >> D3D11_MAG_FILTER_SHIFT) & D3D11_FILTER_TYPE_MASK))") +cpp_quote("#define D3D11_DECODE_MIP_FILTER(f) ((D3D11_FILTER_TYPE)(((f) >> D3D11_MIP_FILTER_SHIFT) & D3D11_FILTER_TYPE_MASK))") +cpp_quote("#define D3D11_DECODE_IS_COMPARISON_FILTER(f) ((f) & D3D11_COMPARISON_FILTERING_BIT)") +cpp_quote("#define D3D11_DECODE_IS_ANISOTROPIC_FILTER(f) (((f) & D3D11_ANISOTROPIC_FILTERING_BIT) && (D3D11_DECODE_MIN_FILTER(f) == D3D11_FILTER_TYPE_LINEAR) && (D3D11_DECODE_MAG_FILTER(f) == D3D11_FILTER_TYPE_LINEAR) && (D3D11_DECODE_MIP_FILTER( f ) == D3D11_FILTER_TYPE_LINEAR))") + +typedef enum D3D11_TEXTURE_ADDRESS_MODE +{ + D3D11_TEXTURE_ADDRESS_WRAP = 1, + D3D11_TEXTURE_ADDRESS_MIRROR = 2, + D3D11_TEXTURE_ADDRESS_CLAMP = 3, + D3D11_TEXTURE_ADDRESS_BORDER = 4, + D3D11_TEXTURE_ADDRESS_MIRROR_ONCE = 5 +} D3D11_TEXTURE_ADDRESS_MODE; + +typedef struct D3D11_SAMPLER_DESC +{ + D3D11_FILTER Filter; + D3D11_TEXTURE_ADDRESS_MODE AddressU; + D3D11_TEXTURE_ADDRESS_MODE AddressV; + D3D11_TEXTURE_ADDRESS_MODE AddressW; + FLOAT MipLODBias; + UINT MaxAnisotropy; + D3D11_COMPARISON_FUNC ComparisonFunc; + FLOAT BorderColor[ 4 ]; + FLOAT MinLOD; + FLOAT MaxLOD; +} D3D11_SAMPLER_DESC; + +typedef enum D3D11_FORMAT_SUPPORT +{ + D3D11_FORMAT_SUPPORT_BUFFER = 0x1, + D3D11_FORMAT_SUPPORT_IA_VERTEX_BUFFER = 0x2, + D3D11_FORMAT_SUPPORT_IA_INDEX_BUFFER = 0x4, + D3D11_FORMAT_SUPPORT_SO_BUFFER = 0x8, + D3D11_FORMAT_SUPPORT_TEXTURE1D = 0x10, + D3D11_FORMAT_SUPPORT_TEXTURE2D = 0x20, + D3D11_FORMAT_SUPPORT_TEXTURE3D = 0x40, + D3D11_FORMAT_SUPPORT_TEXTURECUBE = 0x80, + D3D11_FORMAT_SUPPORT_SHADER_LOAD = 0x100, + D3D11_FORMAT_SUPPORT_SHADER_SAMPLE = 0x200, + D3D11_FORMAT_SUPPORT_SHADER_SAMPLE_COMPARISON = 0x400, + D3D11_FORMAT_SUPPORT_SHADER_SAMPLE_MONO_TEXT = 0x800, + D3D11_FORMAT_SUPPORT_MIP = 0x1000, + D3D11_FORMAT_SUPPORT_MIP_AUTOGEN = 0x2000, + D3D11_FORMAT_SUPPORT_RENDER_TARGET = 0x4000, + D3D11_FORMAT_SUPPORT_BLENDABLE = 0x8000, + D3D11_FORMAT_SUPPORT_DEPTH_STENCIL = 0x10000, + D3D11_FORMAT_SUPPORT_CPU_LOCKABLE = 0x20000, + D3D11_FORMAT_SUPPORT_MULTISAMPLE_RESOLVE = 0x40000, + D3D11_FORMAT_SUPPORT_DISPLAY = 0x80000, + D3D11_FORMAT_SUPPORT_CAST_WITHIN_BIT_LAYOUT = 0x100000, + D3D11_FORMAT_SUPPORT_MULTISAMPLE_RENDERTARGET = 0x200000, + D3D11_FORMAT_SUPPORT_MULTISAMPLE_LOAD = 0x400000, + D3D11_FORMAT_SUPPORT_SHADER_GATHER = 0x800000, + D3D11_FORMAT_SUPPORT_BACK_BUFFER_CAST = 0x1000000, + D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW = 0x2000000, + D3D11_FORMAT_SUPPORT_SHADER_GATHER_COMPARISON = 0x4000000 +} D3D11_FORMAT_SUPPORT; + +typedef enum D3D11_FORMAT_SUPPORT2 +{ + D3D11_FORMAT_SUPPORT2_UAV_ATOMIC_ADD = 1, + D3D11_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS = 2, + D3D11_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE = 4, + D3D11_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE = 8, + D3D11_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX = 0x10, + D3D11_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX = 0x20, + D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD = 0x40, + D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE = 0x80 +} D3D11_FORMAT_SUPPORT2; + +typedef enum D3D11_ASYNC_GETDATA_FLAG +{ + D3D11_ASYNC_GETDATA_DONOTFLUSH = 1 +} D3D11_ASYNC_GETDATA_FLAG; + +typedef enum D3D11_QUERY +{ + D3D11_QUERY_EVENT, + D3D11_QUERY_OCCLUSION, + D3D11_QUERY_TIMESTAMP, + D3D11_QUERY_TIMESTAMP_DISJOINT, + D3D11_QUERY_PIPELINE_STATISTICS, + D3D11_QUERY_OCCLUSION_PREDICATE, + D3D11_QUERY_SO_STATISTICS, + D3D11_QUERY_SO_OVERFLOW_PREDICATE, + D3D11_QUERY_SO_STATISTICS_STREAM0, + D3D11_QUERY_SO_OVERFLOW_PREDICATE_STREAM0, + D3D11_QUERY_SO_STATISTICS_STREAM1, + D3D11_QUERY_SO_OVERFLOW_PREDICATE_STREAM1, + D3D11_QUERY_SO_STATISTICS_STREAM2, + D3D11_QUERY_SO_OVERFLOW_PREDICATE_STREAM2, + D3D11_QUERY_SO_STATISTICS_STREAM3, + D3D11_QUERY_SO_OVERFLOW_PREDICATE_STREAM3, +} D3D11_QUERY; + +typedef enum D3D11_QUERY_MISC_FLAG +{ + D3D11_QUERY_MISC_PREDICATEHINT = 1 +} D3D11_QUERY_MISC_FLAG; + +typedef struct D3D11_QUERY_DESC +{ + D3D11_QUERY Query; + UINT MiscFlags; +} D3D11_QUERY_DESC; + +typedef struct D3D11_QUERY_DATA_TIMESTAMP_DISJOINT +{ + UINT64 Frequency; + BOOL Disjoint; +} D3D11_QUERY_DATA_TIMESTAMP_DISJOINT; + +typedef struct D3D11_QUERY_DATA_PIPELINE_STATISTICS +{ + UINT64 IAVertices; + UINT64 IAPrimitives; + UINT64 VSInvocations; + UINT64 GSInvocations; + UINT64 GSPrimitives; + UINT64 CInvocations; + UINT64 CPrimitives; + UINT64 PSInvocations; + UINT64 HSInvocations; + UINT64 DSInvocations; + UINT64 CSInvocations; +} D3D11_QUERY_DATA_PIPELINE_STATISTICS; + +typedef struct D3D11_QUERY_DATA_SO_STATISTICS +{ + UINT64 NumPrimitivesWritten; + UINT64 PrimitivesStorageNeeded; +} D3D11_QUERY_DATA_SO_STATISTICS; + +typedef enum D3D11_COUNTER +{ + D3D11_COUNTER_DEVICE_DEPENDENT_0 = 0x40000000 +} D3D11_COUNTER; + +typedef enum D3D11_COUNTER_TYPE +{ + D3D11_COUNTER_TYPE_FLOAT32, + D3D11_COUNTER_TYPE_UINT16, + D3D11_COUNTER_TYPE_UINT32, + D3D11_COUNTER_TYPE_UINT64, +} D3D11_COUNTER_TYPE; + +typedef struct D3D11_COUNTER_DESC +{ + D3D11_COUNTER Counter; + UINT MiscFlags; +} D3D11_COUNTER_DESC; + +typedef struct D3D11_COUNTER_INFO +{ + D3D11_COUNTER LastDeviceDependentCounter; + UINT NumSimultaneousCounters; + UINT8 NumDetectableParallelUnits; +} D3D11_COUNTER_INFO; + +typedef enum D3D11_STANDARD_MULTISAMPLE_QUALITY_LEVELS +{ + D3D11_STANDARD_MULTISAMPLE_PATTERN = 0xffffffff, + D3D11_CENTER_MULTISAMPLE_PATTERN = 0xfffffffe +} D3D11_STANDARD_MULTISAMPLE_QUALITY_LEVELS; + +typedef enum D3D11_DEVICE_CONTEXT_TYPE +{ + D3D11_DEVICE_CONTEXT_IMMEDIATE, + D3D11_DEVICE_CONTEXT_DEFERRED, +} D3D11_DEVICE_CONTEXT_TYPE; + +typedef struct D3D11_CLASS_INSTANCE_DESC +{ + UINT InstanceId; + UINT InstanceIndex; + UINT TypeId; + UINT ConstantBuffer; + UINT BaseConstantBufferOffset; + UINT BaseTexture; + UINT BaseSampler; + BOOL Created; +} D3D11_CLASS_INSTANCE_DESC; + +typedef enum D3D11_FEATURE +{ + D3D11_FEATURE_THREADING, + D3D11_FEATURE_DOUBLES, + D3D11_FEATURE_FORMAT_SUPPORT, + D3D11_FEATURE_FORMAT_SUPPORT2, + D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, +} D3D11_FEATURE; + +typedef struct D3D11_FEATURE_DATA_THREADING +{ + BOOL DriverConcurrentCreates; + BOOL DriverCommandLists; +} D3D11_FEATURE_DATA_THREADING; + +typedef struct D3D11_FEATURE_DATA_DOUBLES +{ + BOOL DoublePrecisionFloatShaderOps; +} D3D11_FEATURE_DATA_DOUBLES; + +typedef struct D3D11_FEATURE_DATA_FORMAT_SUPPORT +{ + DXGI_FORMAT InFormat; + UINT OutFormatSupport; +} D3D11_FEATURE_DATA_FORMAT_SUPPORT; + +typedef struct D3D11_FEATURE_DATA_FORMAT_SUPPORT2 +{ + DXGI_FORMAT InFormat; + UINT OutFormatSupport2; +} D3D11_FEATURE_DATA_FORMAT_SUPPORT2; + +typedef struct D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS +{ + BOOL ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x; +} D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS; + +interface ID3D11Device; + +[object, local, uuid("1841e5c8-16b0-489b-bcc8-44cfb0d5deae")] +interface ID3D11DeviceChild : IUnknown { + void GetDevice( + [out] ID3D11Device **a + ); + + HRESULT GetPrivateData( + [in] REFGUID a, + [in, out] UINT *b, + [out] void *c + ); + + HRESULT SetPrivateData( + [in] REFGUID a, + [in] UINT b, + [in] const void *c + ); + + HRESULT SetPrivateDataInterface( + [in] REFGUID a, + [in] const IUnknown *b + ); +}; + +[object, local, uuid("e4819ddc-4cf0-4025-bd26-5de82a3e07b7")] +interface ID3D11InputLayout : ID3D11DeviceChild +{ +}; + +[object, local, uuid("03823efb-8d8f-4e1c-9aa2-f64bb2cbfdf1")] +interface ID3D11DepthStencilState : ID3D11DeviceChild { + void GetDesc( + [out] D3D11_DEPTH_STENCIL_DESC *a + ); +}; + +[object, local, uuid("75b68faa-347d-4159-8f45-a0640f01cd9a")] +interface ID3D11BlendState : ID3D11DeviceChild { + void GetDesc( + [out] D3D11_BLEND_DESC *a + ); +}; + +[object, local, uuid("9bb4ab81-ab1a-4d8f-b506-fc04200b6ee7")] +interface ID3D11RasterizerState : ID3D11DeviceChild { + void GetDesc( + [out] D3D11_RASTERIZER_DESC *a + ); +}; + +[object, local, uuid("da6fea51-564c-4487-9810-f0d0f9b4e3a5")] +interface ID3D11SamplerState : ID3D11DeviceChild +{ + void GetDesc( + [out] D3D11_SAMPLER_DESC *a + ); + +}; + +[object, local, uuid("dc8e63f3-d12b-4952-b47b-5e45026a862d")] +interface ID3D11Resource : ID3D11DeviceChild { + void GetType( + [out] D3D11_RESOURCE_DIMENSION *a + ); + + void SetEvictionPriority( + [in] UINT a + ); + + UINT GetEvictionPriority( + ); +}; + +[object, local, uuid("48570b85-d1ee-4fcd-a250-eb350722b037")] +interface ID3D11Buffer : ID3D11Resource { + void GetDesc( + [out] D3D11_BUFFER_DESC *a + ); + +}; + +[object, local, uuid("f8fb5c27-c6b3-4f75-a4c8-439af2ef564c")] +interface ID3D11Texture1D : ID3D11Resource { + void GetDesc( + [out] D3D11_TEXTURE1D_DESC *a + ); +}; + +[object, local, uuid("6f15aaf2-d208-4e89-9ab4-489535d34f9c")] +interface ID3D11Texture2D : ID3D11Resource { + void GetDesc( + [out] D3D11_TEXTURE2D_DESC *a + ); +}; + +[object, local, uuid("037e866e-f56d-4357-a8af-9dabbe6e250e")] +interface ID3D11Texture3D : ID3D11Resource { + void GetDesc( + [out] D3D11_TEXTURE3D_DESC *a + ); +}; + +[object, local, uuid("839d1216-bb2e-412b-b7f4-a9dbebe08ed1")] +interface ID3D11View : ID3D11DeviceChild { + void GetResource( + [out] ID3D11Resource **a + ); + +}; + +[object, local, uuid("b0e06fe0-8192-4e1a-b1ca-36d7414710b2")] +interface ID3D11ShaderResourceView : ID3D11View { + void GetDesc( + [out] D3D11_SHADER_RESOURCE_VIEW_DESC *a + ); +}; + +[object, local, uuid("dfdba067-0b8d-4865-875b-d7b4516cc164")] +interface ID3D11RenderTargetView : ID3D11View +{ + void GetDesc( + [out] D3D11_RENDER_TARGET_VIEW_DESC *a + ); +}; + +[object, local, uuid("9fdac92a-1876-48c3-afad-25b94f84a9b6")] +interface ID3D11DepthStencilView : ID3D11View +{ + void GetDesc( + [out] D3D11_DEPTH_STENCIL_VIEW_DESC *a + ); +}; + +[object, local, uuid("28acf509-7f5c-48f6-8611-f316010a6380")] +interface ID3D11UnorderedAccessView : ID3D11View +{ + void GetDesc( + [out] D3D11_UNORDERED_ACCESS_VIEW_DESC *a + ); +}; + +[object, local, uuid("3b301d64-d678-4289-8897-22f8928b72f3")] +interface ID3D11VertexShader : ID3D11DeviceChild +{ +}; + +[object, local, uuid("8e5c6061-628a-4c8e-8264-bbe45cb3d5dd")] +interface ID3D11HullShader : ID3D11DeviceChild +{ +}; + +[object, local, uuid("f582c508-0f36-490c-9977-31eece268cfa")] +interface ID3D11DomainShader : ID3D11DeviceChild +{ +}; + + +[object, local, uuid("38325b96-effb-4022-ba02-2e795b70275c")] +interface ID3D11GeometryShader : ID3D11DeviceChild +{ +}; + +[object, local, uuid("ea82e40d-51dc-4f33-93d4-db7c9125ae8c")] +interface ID3D11PixelShader : ID3D11DeviceChild +{ +}; + +[object, local, uuid("4f5b196e-c2bd-495e-bd01-1fded38e4969")] +interface ID3D11ComputeShader : ID3D11DeviceChild +{ +}; + +[object, local, uuid("4b35d0cd-1e15-4258-9c98-1b1333f6dd3b")] +interface ID3D11Asynchronous : ID3D11DeviceChild +{ + UINT GetDataSize(); +}; + +[object, local, uuid("d6c00747-87b7-425e-b84d-44d108560afd")] +interface ID3D11Query : ID3D11Asynchronous +{ + void GetDesc( + [out] D3D11_QUERY_DESC *a + ); +}; + +[object, local, uuid("9eb576dd-9f77-4d86-81aa-8bab5fe490e2")] +interface ID3D11Predicate : ID3D11Query +{ +}; + +[object, local, uuid("6e8c49fb-a371-4770-b440-29086022b741")] +interface ID3D11Counter : ID3D11Asynchronous +{ + void GetDesc( + [out] D3D11_COUNTER_DESC *a + ); +}; + +interface ID3D11ClassLinkage; + +[object, local, uuid("a6cd7faa-b0b7-4a2f-9436-8662a65797cb")] +interface ID3D11ClassInstance : ID3D11DeviceChild +{ + void GetClassLinkage( + [out] ID3D11ClassLinkage **a + ); + + void GetDesc( + [out] D3D11_CLASS_INSTANCE_DESC *a + ); + + void GetInstanceName( + [out] LPSTR a, + [in, out] SIZE_T *b + ); + + void GetTypeName( + [out] LPSTR a, + [in, out] SIZE_T *b + ); +}; + +[object, local, uuid("ddf57cba-9543-46e4-a12b-f207a0fe7fed")] +interface ID3D11ClassLinkage : ID3D11DeviceChild +{ + HRESULT GetClassInstance( + [in] LPCSTR a, + [in] UINT b, + [out] ID3D11ClassInstance **c + ); + + HRESULT CreateClassInstance( + [in] LPCSTR a, + [in] UINT b, + [in] UINT c, + [in] UINT d, + [in] UINT e, + [out] ID3D11ClassInstance **f + ); +}; + +[object, local, uuid("a24bc4d1-769e-43f7-8013-98ff566c18e2")] +interface ID3D11CommandList : ID3D11DeviceChild +{ + UINT GetContextFlags(); +}; + +interface ID3D11DeviceContext; + +[object, local, uuid("db6f6ddb-ac77-4e88-8253-819df9bbf140")] +interface ID3D11Device : IUnknown +{ + HRESULT CreateBuffer( + [in] const D3D11_BUFFER_DESC *a, + [in] const D3D11_SUBRESOURCE_DATA *b, + [out] ID3D11Buffer **c + ); + + HRESULT CreateTexture1D( + [in] const D3D11_TEXTURE1D_DESC *a, + [in] const D3D11_SUBRESOURCE_DATA *b, + [out] ID3D11Texture1D **c + ); + + HRESULT CreateTexture2D( + [in] const D3D11_TEXTURE2D_DESC *a, + [in] const D3D11_SUBRESOURCE_DATA *b, + [out] ID3D11Texture2D **c + ); + + HRESULT CreateTexture3D( + [in] const D3D11_TEXTURE3D_DESC *a, + [in] const D3D11_SUBRESOURCE_DATA *b, + [out] ID3D11Texture3D **c + ); + + HRESULT CreateShaderResourceView( + [in] ID3D11Resource *a, + [in] const D3D11_SHADER_RESOURCE_VIEW_DESC *b, + [out] ID3D11ShaderResourceView **c + ); + + HRESULT CreateUnorderedAccessView( + [in] ID3D11Resource *a, + [in] const D3D11_UNORDERED_ACCESS_VIEW_DESC *b, + [out] ID3D11UnorderedAccessView **c + ); + + HRESULT CreateRenderTargetView( + [in] ID3D11Resource *a, + [in] const D3D11_RENDER_TARGET_VIEW_DESC *b, + [out] ID3D11RenderTargetView **c + ); + + HRESULT CreateDepthStencilView( + [in] ID3D11Resource *a, + [in] const D3D11_DEPTH_STENCIL_VIEW_DESC *b, + [out] ID3D11DepthStencilView **c + ); + + HRESULT CreateInputLayout( + [in] const D3D11_INPUT_ELEMENT_DESC *a, + [in] UINT b, + [in] const void *c, + [in] SIZE_T d, + [out] ID3D11InputLayout **e + ); + + HRESULT CreateVertexShader( + [in] const void *a, + [in] SIZE_T b, + [in] ID3D11ClassLinkage *c, + [out] ID3D11VertexShader **d + ); + + HRESULT CreateGeometryShader( + [in] const void *a, + [in] SIZE_T b, + [in] ID3D11ClassLinkage *c, + [out] ID3D11GeometryShader **d + ); + + HRESULT CreateGeometryShaderWithStreamOutput( + [in] const void *a, + [in] SIZE_T b, + [in] const D3D11_SO_DECLARATION_ENTRY *c, + [in] UINT d, + [in] const UINT *e, + [in] UINT f, + [in] UINT g, + [in] ID3D11ClassLinkage *h, + [out] ID3D11GeometryShader **i + ); + + HRESULT CreatePixelShader( + [in] const void *a, + [in] SIZE_T b, + [in] ID3D11ClassLinkage *c, + [out] ID3D11PixelShader **d + ); + + HRESULT CreateHullShader( + [in] const void *a, + [in] SIZE_T b, + [in] ID3D11ClassLinkage *c, + [out] ID3D11HullShader **d + ); + + HRESULT CreateDomainShader( + [in] const void *a, + [in] SIZE_T b, + [in] ID3D11ClassLinkage *c, + [out] ID3D11DomainShader **d + ); + + HRESULT CreateComputeShader( + [in] const void *a, + [in] SIZE_T b, + [in] ID3D11ClassLinkage *c, + [out] ID3D11ComputeShader **d + ); + + HRESULT CreateClassLinkage( + [out] ID3D11ClassLinkage **a + ); + + HRESULT CreateBlendState( + [in] const D3D11_BLEND_DESC *a, + [out] ID3D11BlendState **b + ); + + HRESULT CreateDepthStencilState( + [in] const D3D11_DEPTH_STENCIL_DESC *a, + [out] ID3D11DepthStencilState **b + ); + + HRESULT CreateRasterizerState( + [in] const D3D11_RASTERIZER_DESC *a, + [out] ID3D11RasterizerState **b + ); + + HRESULT CreateSamplerState( + [in] const D3D11_SAMPLER_DESC *a, + [out] ID3D11SamplerState **b + ); + + HRESULT CreateQuery( + [in] const D3D11_QUERY_DESC *a, + [out] ID3D11Query **b + ); + + HRESULT CreatePredicate( + [in] const D3D11_QUERY_DESC *a, + [out] ID3D11Predicate **b + ); + + HRESULT CreateCounter( + [in] const D3D11_COUNTER_DESC *a, + [out] ID3D11Counter **b + ); + + HRESULT CreateDeferredContext( + [in] UINT a, + [out] ID3D11DeviceContext **b + ); + + HRESULT OpenSharedResource( + [in] HANDLE a, + [in] REFIID b, + [out] void **c + ); + + HRESULT CheckFormatSupport( + [in] DXGI_FORMAT a, + [out] UINT *b + ); + + HRESULT CheckMultisampleQualityLevels( + [in] DXGI_FORMAT a, + [in] UINT b, + [out] UINT *c + ); + + void CheckCounterInfo( + [out] D3D11_COUNTER_INFO *a + ); + + HRESULT CheckCounter( + [in] const D3D11_COUNTER_DESC *a, + [out] D3D11_COUNTER_TYPE *b, + [out] UINT *c, + [out] LPSTR d, + [in, out, optional] UINT *e, + [out] LPSTR f, + [in, out, optional] UINT *g, + [out] LPSTR h, + [in, out, optional] UINT *i + ); + + HRESULT CheckFeatureSupport( + [in] D3D11_FEATURE a, + [out] void *b, + [in] UINT c + ); + + HRESULT GetPrivateData( + [in] REFGUID a, + [in, out] UINT *b, + [out] void *c + ); + + HRESULT SetPrivateData( + [in] REFGUID a, + [in] UINT b, + [in] const void *c + ); + + HRESULT SetPrivateDataInterface( + [in] REFGUID a, + [in] const IUnknown *b + ); + + D3D_FEATURE_LEVEL GetFeatureLevel(); + + UINT GetCreationFlags(); + + HRESULT GetDeviceRemovedReason(); + + void GetImmediateContext( + [out] ID3D11DeviceContext **a + ); + + HRESULT SetExceptionMode( + [in] UINT a + ); + + UINT GetExceptionMode(); +}; + +[object, local, uuid("c0bfa96c-e089-44fb-8eaf-26f8796190da")] +interface ID3D11DeviceContext : ID3D11DeviceChild +{ + void VSSetConstantBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D11Buffer *const *c + ); + + void PSSetShaderResources( + [in] UINT a, + [in] UINT b, + [in] ID3D11ShaderResourceView *const *c + ); + + void PSSetShader( + [in] ID3D11PixelShader *a, + [in] ID3D11ClassInstance *const *b, + [in] UINT c + ); + + void PSSetSamplers( + [in] UINT a, + [in] UINT b, + [in] ID3D11SamplerState *const *c + ); + + void VSSetShader( + [in] ID3D11VertexShader *a, + [in] ID3D11ClassInstance *const *b, + [in] UINT c + ); + + void DrawIndexed( + [in] UINT a, + [in] UINT b, + [in] INT c + ); + + void Draw( + [in] UINT a, + [in] UINT b + ); + + HRESULT Map( + [in] ID3D11Resource *a, + [in] UINT b, + [in] D3D11_MAP c, + [in] UINT d, + [out] D3D11_MAPPED_SUBRESOURCE *e + ); + + void Unmap( + [in] ID3D11Resource *a, + [in] UINT b + ); + + void PSSetConstantBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D11Buffer *const *c + ); + + void IASetInputLayout( + [in] ID3D11InputLayout *a + ); + + void IASetVertexBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D11Buffer *const *c, + [in] const UINT *d, + [in] const UINT *e + ); + + void IASetIndexBuffer( + [in] ID3D11Buffer *a, + [in] DXGI_FORMAT b, + [in] UINT c + ); + + void DrawIndexedInstanced( + [in] UINT a, + [in] UINT b, + [in] UINT c, + [in] INT d, + [in] UINT e + ); + + void DrawInstanced( + [in] UINT a, + [in] UINT b, + [in] UINT c, + [in] UINT d + ); + + void GSSetConstantBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D11Buffer *const *c + ); + + void GSSetShader( + [in] ID3D11GeometryShader *a, + [in] ID3D11ClassInstance *const *b, + [in] UINT c + ); + + void IASetPrimitiveTopology( + [in] D3D11_PRIMITIVE_TOPOLOGY a + ); + + void VSSetShaderResources( + [in] UINT a, + [in] UINT b, + [in] ID3D11ShaderResourceView *const *c + ); + + void VSSetSamplers( + [in] UINT a, + [in] UINT b, + [in] ID3D11SamplerState *const *c + ); + + void Begin( + [in] ID3D11Asynchronous *a + ); + + void End( + [in] ID3D11Asynchronous *a + ); + + HRESULT GetData( + [in] ID3D11Asynchronous *a, + [out] void *b, + [in] UINT c, + [in] UINT d + ); + + void SetPredication( + [in] ID3D11Predicate *a, + [in] BOOL b + ); + + void GSSetShaderResources( + [in] UINT a, + [in] UINT b, + [in] ID3D11ShaderResourceView *const *c + ); + + void GSSetSamplers( + [in] UINT a, + [in] UINT b, + [in] ID3D11SamplerState *const *c + ); + + void OMSetRenderTargets( + [in] UINT a, + [in] ID3D11RenderTargetView *const *b, + [in] ID3D11DepthStencilView *c + ); + + void OMSetRenderTargetsAndUnorderedAccessViews( + [in] UINT a, + [in] ID3D11RenderTargetView *const *b, + [in] ID3D11DepthStencilView *c, + [in] UINT d, + [in] UINT e, + [in] ID3D11UnorderedAccessView *const *f, + [in] const UINT *g + ); + + void OMSetBlendState( + [in] ID3D11BlendState *a, + [in] const FLOAT b[4], + [in] UINT c + ); + + void OMSetDepthStencilState( + [in] ID3D11DepthStencilState *a, + [in] UINT b + ); + + void SOSetTargets( + [in] UINT a, + [in] ID3D11Buffer *const *b, + [in] const UINT *c + ); + + void DrawAuto( + ); + + void DrawIndexedInstancedIndirect( + [in] ID3D11Buffer *a, + [in] UINT b + ); + + void DrawInstancedIndirect( + [in] ID3D11Buffer *a, + [in] UINT b + ); + + void Dispatch( + [in] UINT a, + [in] UINT b, + [in] UINT c + ); + + void DispatchIndirect( + [in] ID3D11Buffer *a, + [in] UINT b + ); + + void RSSetState( + [in] ID3D11RasterizerState *a + ); + + void RSSetViewports( + [in] UINT a, + [in] const D3D11_VIEWPORT *b + ); + + void RSSetScissorRects( + [in] UINT a, + [in] const D3D11_RECT *b + ); + + void CopySubresourceRegion( + [in] ID3D11Resource *a, + [in] UINT b, + [in] UINT c, + [in] UINT d, + [in] UINT e, + [in] ID3D11Resource *f, + [in] UINT g, + [in] const D3D11_BOX *h + ); + + void CopyResource( + [in] ID3D11Resource *a, + [in] ID3D11Resource *b + ); + + void UpdateSubresource( + [in] ID3D11Resource *a, + [in] UINT b, + [in] const D3D11_BOX *c, + [in] const void *d, + [in] UINT e, + [in] UINT f + ); + + void CopyStructureCount( + [in] ID3D11Buffer *a, + [in] UINT b, + [in] ID3D11UnorderedAccessView *c + ); + + void ClearRenderTargetView( + [in] ID3D11RenderTargetView *a, + [in] const FLOAT b[4] + ); + + void ClearUnorderedAccessViewUint( + [in] ID3D11UnorderedAccessView *a, + [in] const UINT b[4] + ); + + void ClearUnorderedAccessViewFloat( + [in] ID3D11UnorderedAccessView *a, + [in] const FLOAT b[4] + ); + + void ClearDepthStencilView( + [in] ID3D11DepthStencilView *a, + [in] UINT b, + [in] FLOAT c, + [in] UINT8 d + ); + + void GenerateMips( + [in] ID3D11ShaderResourceView *a + ); + + void SetResourceMinLOD( + [in] ID3D11Resource *a, + [in] FLOAT b + ); + + FLOAT GetResourceMinLOD( + [in] ID3D11Resource *a + ); + + void ResolveSubresource( + [in] ID3D11Resource *a, + [in] UINT b, + [in] ID3D11Resource *c, + [in] UINT d, + [in] DXGI_FORMAT e + ); + + void ExecuteCommandList( + [in] ID3D11CommandList *a, + [in] BOOL b + ); + + void HSSetShaderResources( + [in] UINT a, + [in] UINT b, + [in] ID3D11ShaderResourceView *const *c + ); + + void HSSetShader( + [in] ID3D11HullShader *a, + [in] ID3D11ClassInstance *const *b, + [in] UINT c + ); + + void HSSetSamplers( + [in] UINT a, + [in] UINT b, + [in] ID3D11SamplerState *const *c + ); + + void HSSetConstantBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D11Buffer *const *c + ); + + void DSSetShaderResources( + [in] UINT a, + [in] UINT b, + [in] ID3D11ShaderResourceView *const *c + ); + + void DSSetShader( + [in] ID3D11DomainShader *a, + [in] ID3D11ClassInstance *const *b, + [in] UINT c + ); + + void DSSetSamplers( + [in] UINT a, + [in] UINT b, + [in] ID3D11SamplerState *const *c + ); + + void DSSetConstantBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D11Buffer *const *c + ); + + void CSSetShaderResources( + [in] UINT a, + [in] UINT b, + [in] ID3D11ShaderResourceView *const *c + ); + + void CSSetUnorderedAccessViews( + [in] UINT a, + [in] UINT b, + [in] ID3D11UnorderedAccessView *const *c, + [in] const UINT *d + ); + + void CSSetShader( + [in] ID3D11ComputeShader *a, + [in] ID3D11ClassInstance *const *b, + [in] UINT c + ); + + void CSSetSamplers( + [in] UINT a, + [in] UINT b, + [in] ID3D11SamplerState *const *c + ); + + void CSSetConstantBuffers( + [in] UINT a, + [in] UINT b, + [in] ID3D11Buffer *const *c + ); + + void VSGetConstantBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D11Buffer **c + ); + + void PSGetShaderResources( + [in] UINT a, + [in] UINT b, + [out] ID3D11ShaderResourceView **c + ); + + void PSGetShader( + [out] ID3D11PixelShader **a, + [out] ID3D11ClassInstance **b, + [in, out, optional] UINT *c + ); + + void PSGetSamplers( + [in] UINT a, + [in] UINT b, + [out] ID3D11SamplerState **c + ); + + void VSGetShader( + [out] ID3D11VertexShader **a, + [out] ID3D11ClassInstance **b, + [in, out, optional] UINT *c + ); + + void PSGetConstantBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D11Buffer **c + ); + + void IAGetInputLayout( + [out] ID3D11InputLayout **a + ); + + void IAGetVertexBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D11Buffer **c, + [out] UINT *d, + [out] UINT *e + ); + + void IAGetIndexBuffer( + [out] ID3D11Buffer **a, + [out] DXGI_FORMAT *b, + [out] UINT *c + ); + + void GSGetConstantBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D11Buffer **c + ); + + void GSGetShader( + [out] ID3D11GeometryShader **a, + [out] ID3D11ClassInstance **b, + [in, out, optional] UINT *c + ); + + void IAGetPrimitiveTopology( + [out] D3D11_PRIMITIVE_TOPOLOGY *a + ); + + void VSGetShaderResources( + [in] UINT a, + [in] UINT b, + [out] ID3D11ShaderResourceView **c + ); + + void VSGetSamplers( + [in] UINT a, + [in] UINT b, + [out] ID3D11SamplerState **c + ); + + void GetPredication( + [out] ID3D11Predicate **a, + [out] BOOL *b + ); + + void GSGetShaderResources( + [in] UINT a, + [in] UINT b, + [out] ID3D11ShaderResourceView **c + ); + + void GSGetSamplers( + [in] UINT a, + [in] UINT b, + [out] ID3D11SamplerState **c + ); + + void OMGetRenderTargets( + [in] UINT a, + [out] ID3D11RenderTargetView **b, + [out] ID3D11DepthStencilView **c + ); + + void OMGetRenderTargetsAndUnorderedAccessViews( + [in] UINT a, + [out] ID3D11RenderTargetView **b, + [out] ID3D11DepthStencilView **c, + [in] UINT d, + [in] UINT e, + [out] ID3D11UnorderedAccessView **f + ); + + void OMGetBlendState( + [out] ID3D11BlendState **a, + [out] FLOAT b[4], + [out] UINT *c + ); + + void OMGetDepthStencilState( + [out] ID3D11DepthStencilState **a, + [out] UINT *b + ); + + void SOGetTargets( + [in] UINT a, + [out] ID3D11Buffer **b + ); + + void RSGetState( + [out] ID3D11RasterizerState **a + ); + + void RSGetViewports( + [in, out] UINT *a, + [out] D3D11_VIEWPORT *b + ); + + void RSGetScissorRects( + [in, out] UINT *a, + [out] D3D11_RECT *b + ); + + void HSGetShaderResources( + [in] UINT a, + [in] UINT b, + [out] ID3D11ShaderResourceView **c + ); + + void HSGetShader( + [out] ID3D11HullShader **a, + [out] ID3D11ClassInstance **b, + [in, out, optional] UINT *c + ); + + void HSGetSamplers( + [in] UINT a, + [in] UINT b, + [out] ID3D11SamplerState **c + ); + + void HSGetConstantBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D11Buffer **c + ); + + void DSGetShaderResources( + [in] UINT a, + [in] UINT b, + [out] ID3D11ShaderResourceView **c + ); + + void DSGetShader( + [out] ID3D11DomainShader **a, + [out] ID3D11ClassInstance **b, + [in, out, optional] UINT *c + ); + + void DSGetSamplers( + [in] UINT a, + [in] UINT b, + [out] ID3D11SamplerState **c + ); + + void DSGetConstantBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D11Buffer **c + ); + + void CSGetShaderResources( + [in] UINT a, + [in] UINT b, + [out] ID3D11ShaderResourceView **c + ); + + void CSGetUnorderedAccessViews( + [in] UINT a, + [in] UINT b, + [out] ID3D11UnorderedAccessView **c + ); + + void CSGetShader( + [out] ID3D11ComputeShader **a, + [out] ID3D11ClassInstance **b, + [in, out, optional] UINT *c + ); + + void CSGetSamplers( + [in] UINT a, + [in] UINT b, + [out] ID3D11SamplerState **c + ); + + void CSGetConstantBuffers( + [in] UINT a, + [in] UINT b, + [out] ID3D11Buffer **c + ); + + void ClearState(); + + void Flush(); + + D3D11_DEVICE_CONTEXT_TYPE GetType(); + + UINT GetContextFlags(); + + HRESULT FinishCommandList( + [in] BOOL a, + [out] ID3D11CommandList **b + ); +}; + +cpp_quote("#include \"d3d10_1.h\"") +cpp_quote("#include \"d3d10shader.h\"") +cpp_quote("#include \"d3d10effect.h\"") +/*cpp_quote("#include \"d3d10_1shader.h\"") */ + +typedef enum D3D11_CREATE_DEVICE_FLAG +{ + D3D11_CREATE_DEVICE_SINGLETHREADED = 1, + D3D11_CREATE_DEVICE_DEBUG = 2, + D3D11_CREATE_DEVICE_SWITCH_TO_REF = 4, + D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS = 8, + D3D11_CREATE_DEVICE_BGRA_SUPPORT = 0x20 +} D3D11_CREATE_DEVICE_FLAG; + + +HRESULT D3D11CreateDevice( + [in,optional] IDXGIAdapter* a, + [in] D3D_DRIVER_TYPE b, + [in] HMODULE c, + [in] UINT d, + [in,optional] const D3D_FEATURE_LEVEL* e, + [in] UINT f, + [in] UINT g, + [out,optional] ID3D11Device** h, + [out,optional] D3D_FEATURE_LEVEL* i, + [out,optional] ID3D11DeviceContext** j +); + +typedef HRESULT (* PFN_D3D11_CREATE_DEVICE_AND_SWAP_CHAIN)( + [in,optional] IDXGIAdapter*, + D3D_DRIVER_TYPE, + HMODULE, + UINT, + [in,optional] const D3D_FEATURE_LEVEL*, + UINT, + UINT, + [in, optional] const DXGI_SWAP_CHAIN_DESC*, + [out,optional] IDXGISwapChain**, + [out,optional] ID3D11Device**, + [out,optional] D3D_FEATURE_LEVEL*, + [out,optional] ID3D11DeviceContext** +); + +HRESULT D3D11CreateDeviceAndSwapChain( + [in,optional] IDXGIAdapter* a, + [in] D3D_DRIVER_TYPE b, + [in] HMODULE c, + [in] UINT d, + [in,optional] const D3D_FEATURE_LEVEL* e, + [in] UINT f, + [in] UINT g, + [in,optional] const DXGI_SWAP_CHAIN_DESC* h, + [out,optional] IDXGISwapChain** i, + [out,optional] ID3D11Device** j, + [out,optional] D3D_FEATURE_LEVEL* k, + [out,optional] ID3D11DeviceContext** l +); diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/d3d11shader.idl b/src/gallium/state_trackers/d3d1x/d3dapi/d3d11shader.idl new file mode 100644 index 0000000000..bb20d91053 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/d3d11shader.idl @@ -0,0 +1,287 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +import "d3dcommon.idl"; + +typedef enum D3D11_SHADER_VERSION_TYPE +{ + D3D11_SHVER_PIXEL_SHADER, + D3D11_SHVER_VERTEX_SHADER, + D3D11_SHVER_GEOMETRY_SHADER, + + D3D11_SHVER_HULL_SHADER, + D3D11_SHVER_DOMAIN_SHADER, + D3D11_SHVER_COMPUTE_SHADER, +} D3D11_SHADER_VERSION_TYPE; + +cpp_quote("#define D3D11_SHVER_GET_TYPE(v) (((v) >> 16) & 0xffff)") +cpp_quote("#define D3D11_SHVER_GET_MAJOR(v) (((v) >> 4) & 0xf)") +cpp_quote("#define D3D11_SHVER_GET_MINOR(v) (((v) >> 0) & 0xf)") + +typedef D3D_RESOURCE_RETURN_TYPE D3D11_RESOURCE_RETURN_TYPE; +typedef D3D_CBUFFER_TYPE D3D11_CBUFFER_TYPE; + +typedef struct _D3D11_SIGNATURE_PARAMETER_DESC +{ + LPCSTR SemanticName; + UINT SemanticIndex; + UINT Register; + D3D_NAME SystemValueType; + D3D_REGISTER_COMPONENT_TYPE ComponentType; + BYTE Mask; + BYTE ReadWriteMask; + UINT Stream; +} D3D11_SIGNATURE_PARAMETER_DESC; + +typedef struct _D3D11_SHADER_BUFFER_DESC +{ + LPCSTR Name; + D3D_CBUFFER_TYPE Type; + UINT Variables; + UINT Size; + UINT uFlags; +} D3D11_SHADER_BUFFER_DESC; + +typedef struct _D3D11_SHADER_VARIABLE_DESC +{ + LPCSTR Name; + UINT StartOffset; + UINT Size; + UINT uFlags; + LPVOID DefaultValue; + UINT StartTexture; + UINT TextureSize; + UINT StartSampler; + UINT SamplerSize; +} D3D11_SHADER_VARIABLE_DESC; + +typedef struct _D3D11_SHADER_TYPE_DESC +{ + D3D_SHADER_VARIABLE_CLASS Class; + D3D_SHADER_VARIABLE_TYPE Type; + UINT Rows; + UINT Columns; + UINT Elements; + UINT Members; + UINT Offset; + LPCSTR Name; +} D3D11_SHADER_TYPE_DESC; + +typedef D3D_TESSELLATOR_DOMAIN D3D11_TESSELLATOR_DOMAIN; +typedef D3D_TESSELLATOR_PARTITIONING D3D11_TESSELLATOR_PARTITIONING; +typedef D3D_TESSELLATOR_OUTPUT_PRIMITIVE D3D11_TESSELLATOR_OUTPUT_PRIMITIVE; + +typedef struct _D3D11_SHADER_DESC +{ + UINT Version; + LPCSTR Creator; + UINT Flags; + + UINT ConstantBuffers; + UINT BoundResources; + UINT InputParameters; + UINT OutputParameters; + + UINT InstructionCount; + UINT TempRegisterCount; + UINT TempArrayCount; + UINT DefCount; + UINT DclCount; + UINT TextureNormalInstructions; + UINT TextureLoadInstructions; + UINT TextureCompInstructions; + UINT TextureBiasInstructions; + UINT TextureGradientInstructions; + UINT FloatInstructionCount; + UINT IntInstructionCount; + UINT UintInstructionCount; + UINT StaticFlowControlCount; + UINT DynamicFlowControlCount; + UINT MacroInstructionCount; + UINT ArrayInstructionCount; + UINT CutInstructionCount; + UINT EmitInstructionCount; + D3D_PRIMITIVE_TOPOLOGY GSOutputTopology; + UINT GSMaxOutputVertexCount; + D3D_PRIMITIVE InputPrimitive; + UINT PatchConstantParameters; + UINT cGSInstanceCount; + UINT cControlPoints; + D3D_TESSELLATOR_OUTPUT_PRIMITIVE HSOutputPrimitive; + D3D_TESSELLATOR_PARTITIONING HSPartitioning; + D3D_TESSELLATOR_DOMAIN TessellatorDomain; + + UINT cBarrierInstructions; + UINT cInterlockedInstructions; + UINT cTextureStoreInstructions; +} D3D11_SHADER_DESC; + +typedef struct _D3D11_SHADER_INPUT_BIND_DESC +{ + LPCSTR Name; + D3D_SHADER_INPUT_TYPE Type; + UINT BindPoint; + UINT BindCount; + + UINT uFlags; + D3D_RESOURCE_RETURN_TYPE ReturnType; + D3D_SRV_DIMENSION Dimension; + UINT NumSamples; +} D3D11_SHADER_INPUT_BIND_DESC; + +[local, object, uuid("6e6ffa6a-9bae-4613-a51e-91652d508c21")] +interface ID3D11ShaderReflectionType +{ + HRESULT GetDesc( + [out] D3D11_SHADER_TYPE_DESC *a + ); + + ID3D11ShaderReflectionType* GetMemberTypeByIndex( + [in] UINT a + ); + + ID3D11ShaderReflectionType* GetMemberTypeByName( + [in] LPCSTR a + ); + + LPCSTR GetMemberTypeName( + [in] UINT a + ); + + HRESULT IsEqual( + [in] ID3D11ShaderReflectionType* a + ); + ID3D11ShaderReflectionType* GetSubType(); + ID3D11ShaderReflectionType* GetBaseClass(); + UINT GetNumInterfaces(); + ID3D11ShaderReflectionType* GetInterfaceByIndex( + [in] UINT a + ); + HRESULT IsOfType( + [in] ID3D11ShaderReflectionType* a + ); + HRESULT ImplementsInterface( + [in] ID3D11ShaderReflectionType* a + ); +}; + +interface ID3D11ShaderReflectionConstantBuffer; + +[object, local, uuid("51f23923-f3e5-4bd1-91cb-606177d8db4c")] +interface ID3D11ShaderReflectionVariable +{ + HRESULT GetDesc( + [out] D3D11_SHADER_VARIABLE_DESC *a + ); + + ID3D11ShaderReflectionType* GetType(); + ID3D11ShaderReflectionConstantBuffer* GetBuffer(); + + UINT GetInterfaceSlot( + [in] UINT a + ); +}; + +[object, local, uuid("eb62d63d-93dd-4318-8ae8-c6f83ad371b8")] +interface ID3D11ShaderReflectionConstantBuffer +{ + HRESULT GetDesc( + [out] D3D11_SHADER_BUFFER_DESC *a + ); + + ID3D11ShaderReflectionVariable* GetVariableByIndex( + [in] UINT a + ); + + ID3D11ShaderReflectionVariable* GetVariableByName( + [in] LPCSTR a + ); +}; + +[object,local,uuid("0a233719-3960-4578-9d7c-203b8b1d9cc1")] +interface ID3D11ShaderReflection +{ + HRESULT GetDesc( + [out] D3D11_SHADER_DESC *a + ); + + ID3D11ShaderReflectionConstantBuffer* GetConstantBufferByIndex( + [in] UINT a + ); + + ID3D11ShaderReflectionConstantBuffer* GetConstantBufferByName( + [in] LPCSTR a + ); + + HRESULT GetResourceBindingDesc( + [in] UINT a, + [out] D3D11_SHADER_INPUT_BIND_DESC *b + ); + + HRESULT GetInputParameterDesc( + [in] UINT a, + [out] D3D11_SIGNATURE_PARAMETER_DESC *b + ); + + HRESULT GetOutputParameterDesc + ( + [in] UINT a, + [out] D3D11_SIGNATURE_PARAMETER_DESC *b + ); + + HRESULT GetPatchConstantParameterDesc( + [in] UINT a, + [out] D3D11_SIGNATURE_PARAMETER_DESC *b + ); + + ID3D11ShaderReflectionVariable* GetVariableByName( + [in] LPCSTR a + ); + + HRESULT GetResourceBindingDescByName( + [in] LPCSTR a, + [out] D3D11_SHADER_INPUT_BIND_DESC *b + ); + + UINT GetMovInstructionCount(); + UINT GetMovcInstructionCount(); + UINT GetConversionInstructionCount(); + UINT GetBitwiseInstructionCount(); + D3D_PRIMITIVE GetGSInputPrimitive(); + BOOL IsSampleFrequencyShader(); + UINT GetNumInterfaceSlots(); + + HRESULT GetMinFeatureLevel( + [out] D3D_FEATURE_LEVEL* a + ); + + UINT GetThreadGroupSize( + [out,optional] UINT* a, + [out,optional] UINT* b, + [out,optional] UINT* c + ); +}; + diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/d3dcommon.idl b/src/gallium/state_trackers/d3d1x/d3dapi/d3dcommon.idl new file mode 100644 index 0000000000..dd90143168 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/d3dcommon.idl @@ -0,0 +1,704 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +import "oaidl.idl"; +import "ocidl.idl"; + +typedef const void* LPCVOID; + +typedef enum D3D_DRIVER_TYPE +{ + D3D_DRIVER_TYPE_UNKNOWN, + D3D_DRIVER_TYPE_HARDWARE, + D3D_DRIVER_TYPE_REFERENCE, + D3D_DRIVER_TYPE_NULL, + D3D_DRIVER_TYPE_SOFTWARE, + D3D_DRIVER_TYPE_WARP, +} D3D_DRIVER_TYPE; + +typedef enum D3D_FEATURE_LEVEL +{ + D3D_FEATURE_LEVEL_9_1 = 0x9100, + D3D_FEATURE_LEVEL_9_2 = 0x9200, + D3D_FEATURE_LEVEL_9_3 = 0x9300, + D3D_FEATURE_LEVEL_10_0 = 0xa000, + D3D_FEATURE_LEVEL_10_1 = 0xa100, + D3D_FEATURE_LEVEL_11_0 = 0xb000 +} D3D_FEATURE_LEVEL; + +typedef enum D3D_PRIMITIVE_TOPOLOGY +{ + D3D_PRIMITIVE_TOPOLOGY_UNDEFINED = 0, + D3D_PRIMITIVE_TOPOLOGY_POINTLIST, + D3D_PRIMITIVE_TOPOLOGY_LINELIST, + D3D_PRIMITIVE_TOPOLOGY_LINESTRIP, + D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + + D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, + D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ, + D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, + D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ, + + D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST = 33, + D3D_PRIMITIVE_TOPOLOGY_2_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_5_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_6_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_7_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_8_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_9_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_10_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_11_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_12_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_13_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_14_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_15_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_16_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_17_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_18_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_19_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_20_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_21_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_22_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_23_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_24_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_25_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_26_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_27_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_28_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_29_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_30_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_31_CONTROL_POINT_PATCHLIST, + D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST, + + D3D10_PRIMITIVE_TOPOLOGY_UNDEFINED = 0, + D3D10_PRIMITIVE_TOPOLOGY_POINTLIST, + D3D10_PRIMITIVE_TOPOLOGY_LINELIST, + D3D10_PRIMITIVE_TOPOLOGY_LINESTRIP, + D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + + D3D10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, + D3D10_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ, + D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, + D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ, + + D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED = 0, + D3D11_PRIMITIVE_TOPOLOGY_POINTLIST, + D3D11_PRIMITIVE_TOPOLOGY_LINELIST, + D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + + D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, + D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ, + + D3D11_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST = 33, + D3D11_PRIMITIVE_TOPOLOGY_2_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_5_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_6_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_7_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_8_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_9_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_10_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_11_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_12_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_13_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_14_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_15_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_16_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_17_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_18_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_19_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_20_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_21_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_22_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_23_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_24_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_25_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_26_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_27_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_28_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_29_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_30_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_31_CONTROL_POINT_PATCHLIST, + D3D11_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST, +} D3D_PRIMITIVE_TOPOLOGY; + +typedef enum D3D_PRIMITIVE +{ + D3D_PRIMITIVE_UNDEFINED = 0, + D3D_PRIMITIVE_POINT, + D3D_PRIMITIVE_LINE, + D3D_PRIMITIVE_TRIANGLE, + + D3D_PRIMITIVE_LINE_ADJ = 6, + D3D_PRIMITIVE_TRIANGLE_ADJ, + + D3D_PRIMITIVE_1_CONTROL_POINT_PATCH = 8, + D3D_PRIMITIVE_2_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_3_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_4_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_5_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_6_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_7_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_8_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_9_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_10_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_11_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_12_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_13_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_14_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_15_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_16_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_17_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_18_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_19_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_20_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_21_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_22_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_23_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_24_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_25_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_26_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_27_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_28_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_29_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_30_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_31_CONTROL_POINT_PATCH, + D3D_PRIMITIVE_32_CONTROL_POINT_PATCH, + + D3D10_PRIMITIVE_UNDEFINED = 0, + D3D10_PRIMITIVE_POINT, + D3D10_PRIMITIVE_LINE, + D3D10_PRIMITIVE_TRIANGLE, + + D3D10_PRIMITIVE_LINE_ADJ = 6, + D3D10_PRIMITIVE_TRIANGLE_ADJ, + + D3D11_PRIMITIVE_UNDEFINED = 0, + D3D11_PRIMITIVE_POINT, + D3D11_PRIMITIVE_LINE, + D3D11_PRIMITIVE_TRIANGLE, + + D3D11_PRIMITIVE_LINE_ADJ = 6, + D3D11_PRIMITIVE_TRIANGLE_ADJ, + + D3D11_PRIMITIVE_1_CONTROL_POINT_PATCH = 8, + D3D11_PRIMITIVE_2_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_3_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_4_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_5_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_6_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_7_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_8_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_9_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_10_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_11_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_12_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_13_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_14_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_15_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_16_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_17_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_18_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_19_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_20_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_21_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_22_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_23_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_24_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_25_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_26_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_27_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_28_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_29_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_30_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_31_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_32_CONTROL_POINT_PATCH, +} D3D_PRIMITIVE; + +typedef enum D3D_SRV_DIMENSION +{ + D3D_SRV_DIMENSION_UNKNOWN = 0, + D3D_SRV_DIMENSION_BUFFER, + D3D_SRV_DIMENSION_TEXTURE1D, + D3D_SRV_DIMENSION_TEXTURE1DARRAY, + D3D_SRV_DIMENSION_TEXTURE2D, + D3D_SRV_DIMENSION_TEXTURE2DARRAY, + D3D_SRV_DIMENSION_TEXTURE2DMS, + D3D_SRV_DIMENSION_TEXTURE2DMSARRAY, + D3D_SRV_DIMENSION_TEXTURE3D, + D3D_SRV_DIMENSION_TEXTURECUBE, + D3D_SRV_DIMENSION_TEXTURECUBEARRAY, + D3D_SRV_DIMENSION_BUFFEREX, + + D3D10_SRV_DIMENSION_UNKNOWN = 0, + D3D10_SRV_DIMENSION_BUFFER, + D3D10_SRV_DIMENSION_TEXTURE1D, + D3D10_SRV_DIMENSION_TEXTURE1DARRAY, + D3D10_SRV_DIMENSION_TEXTURE2D, + D3D10_SRV_DIMENSION_TEXTURE2DARRAY, + D3D10_SRV_DIMENSION_TEXTURE2DMS, + D3D10_SRV_DIMENSION_TEXTURE2DMSARRAY, + D3D10_SRV_DIMENSION_TEXTURE3D, + D3D10_SRV_DIMENSION_TEXTURECUBE, + + D3D10_1_SRV_DIMENSION_UNKNOWN = 0, + D3D10_1_SRV_DIMENSION_BUFFER, + D3D10_1_SRV_DIMENSION_TEXTURE1D, + D3D10_1_SRV_DIMENSION_TEXTURE1DARRAY, + D3D10_1_SRV_DIMENSION_TEXTURE2D, + D3D10_1_SRV_DIMENSION_TEXTURE2DARRAY, + D3D10_1_SRV_DIMENSION_TEXTURE2DMS, + D3D10_1_SRV_DIMENSION_TEXTURE2DMSARRAY, + D3D10_1_SRV_DIMENSION_TEXTURE3D, + D3D10_1_SRV_DIMENSION_TEXTURECUBE, + D3D10_1_SRV_DIMENSION_TEXTURECUBEARRAY, + + D3D11_SRV_DIMENSION_UNKNOWN = 0, + D3D11_SRV_DIMENSION_BUFFER, + D3D11_SRV_DIMENSION_TEXTURE1D, + D3D11_SRV_DIMENSION_TEXTURE1DARRAY, + D3D11_SRV_DIMENSION_TEXTURE2D, + D3D11_SRV_DIMENSION_TEXTURE2DARRAY, + D3D11_SRV_DIMENSION_TEXTURE2DMS, + D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY, + D3D11_SRV_DIMENSION_TEXTURE3D, + D3D11_SRV_DIMENSION_TEXTURECUBE, + D3D11_SRV_DIMENSION_TEXTURECUBEARRAY, + D3D11_SRV_DIMENSION_BUFFEREX, +} D3D_SRV_DIMENSION; + +typedef struct _D3D_SHADER_MACRO +{ + LPCSTR Name; + LPCSTR Definition; +} D3D_SHADER_MACRO; + +typedef struct _D3D_SHADER_MACRO *LPD3D_SHADER_MACRO; + +[object, local, uuid(8ba5fb08-5195-40e2-ac58-0d989c3a0102)] +interface ID3D10Blob : IUnknown +{ + LPVOID GetBufferPointer(); + SIZE_T GetBufferSize(); +}; + +typedef ID3D10Blob* LPD3D10BLOB; +typedef ID3D10Blob ID3DBlob; +typedef ID3DBlob* LPD3DBLOB; + +typedef enum _D3D_INCLUDE_TYPE +{ + D3D_INCLUDE_LOCAL = 0, + D3D_INCLUDE_SYSTEM, + + D3D10_INCLUDE_LOCAL = 0, + D3D10_INCLUDE_SYSTEM, + + D3D_INCLUDE_FORCE_DWORD = 0x7fffffff, +} D3D_INCLUDE_TYPE; + +[object, local] +interface ID3DInclude : IUnknown +{ + HRESULT Open( + [in] D3D_INCLUDE_TYPE a, + [in] LPCSTR b, + [in] LPCVOID c, + [out] LPCVOID *d, + [in] UINT *e + ); + HRESULT Close( + [in] LPCVOID a + ); +}; + +typedef enum _D3D_SHADER_VARIABLE_CLASS +{ + D3D_SVC_SCALAR = 0, + D3D_SVC_VECTOR, + D3D_SVC_MATRIX_ROWS, + D3D_SVC_MATRIX_COLUMNS, + D3D_SVC_OBJECT, + D3D_SVC_STRUCT, + D3D_SVC_INTERFACE_CLASS, + D3D_SVC_INTERFACE_POINTER, + + D3D10_SVC_SCALAR = 0, + D3D10_SVC_VECTOR, + D3D10_SVC_MATRIX_ROWS, + D3D10_SVC_MATRIX_COLUMNS, + D3D10_SVC_OBJECT, + D3D10_SVC_STRUCT, + D3D11_SVC_INTERFACE_CLASS, + D3D11_SVC_INTERFACE_POINTER, + D3D_SVC_FORCE_DWORD = 0x7fffffff +} D3D_SHADER_VARIABLE_CLASS; + +typedef enum _D3D_SHADER_VARIABLE_FLAGS +{ + D3D_SVF_USERPACKED = 1, + D3D_SVF_USED = 2, + D3D_SVF_INTERFACE_POINTER = 4, + D3D_SVF_INTERFACE_PARAMETER = 8, + + D3D10_SVF_USERPACKED = 1, + D3D10_SVF_USED = 2, + D3D10_SVF_INTERFACE_POINTER = 4, + D3D10_SVF_INTERFACE_PARAMETER = 8, + + D3D_SVF_FORCE_DWORD = 0x7fffffff +} D3D_SHADER_VARIABLE_FLAGS; + +typedef enum _D3D_SHADER_VARIABLE_TYPE +{ + D3D_SVT_VOID = 0, + D3D_SVT_BOOL, + D3D_SVT_INT, + D3D_SVT_FLOAT, + D3D_SVT_STRING, + D3D_SVT_TEXTURE, + D3D_SVT_TEXTURE1D, + D3D_SVT_TEXTURE2D, + D3D_SVT_TEXTURE3D, + D3D_SVT_TEXTURECUBE, + D3D_SVT_SAMPLER, + D3D_SVT_SAMPLER1D, + D3D_SVT_SAMPLER2D, + D3D_SVT_SAMPLER3D, + D3D_SVT_SAMPLERCUBE, + D3D_SVT_PIXELSHADER, + D3D_SVT_VERTEXSHADER, + D3D_SVT_PIXELFRAGMENT, + D3D_SVT_VERTEXFRAGMENT, + D3D_SVT_UINT, + D3D_SVT_UINT8, + D3D_SVT_GEOMETRYSHADER, + D3D_SVT_RASTERIZER, + D3D_SVT_DEPTHSTENCIL, + D3D_SVT_BLEND, + D3D_SVT_BUFFER, + D3D_SVT_CBUFFER, + D3D_SVT_TBUFFER, + D3D_SVT_TEXTURE1DARRAY, + D3D_SVT_TEXTURE2DARRAY, + D3D_SVT_RENDERTARGETVIEW, + D3D_SVT_DEPTHSTENCILVIEW, + D3D_SVT_TEXTURE2DMS, + D3D_SVT_TEXTURE2DMSARRAY, + D3D_SVT_TEXTURECUBEARRAY, + D3D_SVT_HULLSHADER, + D3D_SVT_DOMAINSHADER, + D3D_SVT_INTERFACE_POINTER, + D3D_SVT_COMPUTESHADER, + D3D_SVT_DOUBLE, + D3D_SVT_RWTEXTURE1D, + D3D_SVT_RWTEXTURE1DARRAY, + D3D_SVT_RWTEXTURE2D, + D3D_SVT_RWTEXTURE2DARRAY, + D3D_SVT_RWTEXTURE3D, + D3D_SVT_RWBUFFER, + D3D_SVT_BYTEADDRESS_BUFFER, + D3D_SVT_RWBYTEADDRESS_BUFFER, + D3D_SVT_STRUCTURED_BUFFER, + D3D_SVT_RWSTRUCTURED_BUFFER, + D3D_SVT_APPEND_STRUCTURED_BUFFER, + D3D_SVT_CONSUME_STRUCTURED_BUFFER, + + D3D10_SVT_VOID = 0, + D3D10_SVT_BOOL, + D3D10_SVT_INT, + D3D10_SVT_FLOAT, + D3D10_SVT_STRING, + D3D10_SVT_TEXTURE, + D3D10_SVT_TEXTURE1D, + D3D10_SVT_TEXTURE2D, + D3D10_SVT_TEXTURE3D, + D3D10_SVT_TEXTURECUBE, + D3D10_SVT_SAMPLER, + D3D10_SVT_SAMPLER1D, + D3D10_SVT_SAMPLER2D, + D3D10_SVT_SAMPLER3D, + D3D10_SVT_SAMPLERCUBE, + D3D10_SVT_PIXELSHADER, + D3D10_SVT_VERTEXSHADER, + D3D10_SVT_PIXELFRAGMENT, + D3D10_SVT_VERTEXFRAGMENT, + D3D10_SVT_UINT, + D3D10_SVT_UINT8, + D3D10_SVT_GEOMETRYSHADER, + D3D10_SVT_RASTERIZER, + D3D10_SVT_DEPTHSTENCIL, + D3D10_SVT_BLEND, + D3D10_SVT_BUFFER, + D3D10_SVT_CBUFFER, + D3D10_SVT_TBUFFER, + D3D10_SVT_TEXTURE1DARRAY, + D3D10_SVT_TEXTURE2DARRAY, + D3D10_SVT_RENDERTARGETVIEW, + D3D10_SVT_DEPTHSTENCILVIEW, + D3D10_SVT_TEXTURE2DMS, + D3D10_SVT_TEXTURE2DMSARRAY, + D3D10_SVT_TEXTURECUBEARRAY, + + D3D11_SVT_HULLSHADER, + D3D11_SVT_DOMAINSHADER, + D3D11_SVT_INTERFACE_POINTER, + D3D11_SVT_COMPUTESHADER, + D3D11_SVT_DOUBLE, + D3D11_SVT_RWTEXTURE1D, + D3D11_SVT_RWTEXTURE1DARRAY, + D3D11_SVT_RWTEXTURE2D, + D3D11_SVT_RWTEXTURE2DARRAY, + D3D11_SVT_RWTEXTURE3D, + D3D11_SVT_RWBUFFER, + D3D11_SVT_BYTEADDRESS_BUFFER, + D3D11_SVT_RWBYTEADDRESS_BUFFER, + D3D11_SVT_STRUCTURED_BUFFER, + D3D11_SVT_RWSTRUCTURED_BUFFER, + D3D11_SVT_APPEND_STRUCTURED_BUFFER, + D3D11_SVT_CONSUME_STRUCTURED_BUFFER, + + D3D_SVT_FORCE_DWORD = 0x7fffffff +} D3D_SHADER_VARIABLE_TYPE; + +typedef enum _D3D_SHADER_INPUT_FLAGS +{ + D3D_SIF_USERPACKED = 1, + D3D_SIF_COMPARISON_SAMPLER = 2, + D3D_SIF_TEXTURE_COMPONENT_0 = 4, + D3D_SIF_TEXTURE_COMPONENT_1 = 8, + D3D_SIF_TEXTURE_COMPONENTS = 12, + + D3D10_SIF_USERPACKED = 1, + D3D10_SIF_COMPARISON_SAMPLER = 2, + D3D10_SIF_TEXTURE_COMPONENT_0 = 4, + D3D10_SIF_TEXTURE_COMPONENT_1 = 8, + D3D10_SIF_TEXTURE_COMPONENTS = 12, + + D3D_SIF_FORCE_DWORD = 0x7fffffff + } D3D_SHADER_INPUT_FLAGS; + +typedef enum _D3D_SHADER_INPUT_TYPE +{ + D3D_SIT_CBUFFER = 0, + D3D_SIT_TBUFFER, + D3D_SIT_TEXTURE, + D3D_SIT_SAMPLER, + D3D_SIT_UAV_RWTYPED, + D3D_SIT_STRUCTURED, + D3D_SIT_UAV_RWSTRUCTURED, + D3D_SIT_BYTEADDRESS, + D3D_SIT_UAV_RWBYTEADDRESS, + D3D_SIT_UAV_APPEND_STRUCTURED, + D3D_SIT_UAV_CONSUME_STRUCTURED, + D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER, + + D3D10_SIT_CBUFFER = 0, + D3D10_SIT_TBUFFER, + D3D10_SIT_TEXTURE, + D3D10_SIT_SAMPLER, + + D3D11_SIT_UAV_RWTYPED, + D3D11_SIT_STRUCTURED, + D3D11_SIT_UAV_RWSTRUCTURED, + D3D11_SIT_BYTEADDRESS, + D3D11_SIT_UAV_RWBYTEADDRESS, + D3D11_SIT_UAV_APPEND_STRUCTURED, + D3D11_SIT_UAV_CONSUME_STRUCTURED, + D3D11_SIT_UAV_RWSTRUCTURED_WITH_COUNTER, + } D3D_SHADER_INPUT_TYPE; + +typedef enum _D3D_SHADER_CBUFFER_FLAGS +{ + D3D_CBF_USERPACKED = 1, + + D3D10_CBF_USERPACKED = 1, + + D3D_CBF_FORCE_DWORD = 0x7fffffff +} D3D_SHADER_CBUFFER_FLAGS; + +typedef enum _D3D_CBUFFER_TYPE +{ + D3D_CT_CBUFFER = 0, + D3D_CT_TBUFFER, + D3D_CT_INTERFACE_POINTERS, + D3D_CT_RESOURCE_BIND_INFO, + + D3D10_CT_CBUFFER = 0, + D3D10_CT_TBUFFER, + + D3D11_CT_CBUFFER = 0, + D3D11_CT_TBUFFER, + D3D11_CT_INTERFACE_POINTERS, + D3D11_CT_RESOURCE_BIND_INFO, +} D3D_CBUFFER_TYPE; + +typedef enum D3D_NAME +{ + D3D_NAME_UNDEFINED = 0, + D3D_NAME_POSITION, + D3D_NAME_CLIP_DISTANCE, + D3D_NAME_CULL_DISTANCE, + D3D_NAME_RENDER_TARGET_ARRAY_INDEX, + D3D_NAME_VIEWPORT_ARRAY_INDEX, + D3D_NAME_VERTEX_ID, + D3D_NAME_PRIMITIVE_ID, + D3D_NAME_INSTANCE_ID, + D3D_NAME_IS_FRONT_FACE, + D3D_NAME_SAMPLE_INDEX, + D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR, + D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR, + D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR, + D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR, + D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR, + D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR, + + D3D_NAME_TARGET = 64, + D3D_NAME_DEPTH, + D3D_NAME_COVERAGE, + D3D_NAME_DEPTH_GREATER_EQUAL, + D3D_NAME_DEPTH_LESS_EQUAL, + + D3D10_NAME_UNDEFINED = 0, + D3D10_NAME_POSITION, + D3D10_NAME_CLIP_DISTANCE, + D3D10_NAME_CULL_DISTANCE, + D3D10_NAME_RENDER_TARGET_ARRAY_INDEX, + D3D10_NAME_VIEWPORT_ARRAY_INDEX, + D3D10_NAME_VERTEX_ID, + D3D10_NAME_PRIMITIVE_ID, + D3D10_NAME_INSTANCE_ID, + D3D10_NAME_IS_FRONT_FACE, + D3D10_NAME_SAMPLE_INDEX, + + D3D11_NAME_FINAL_QUAD_EDGE_TESSFACTOR, + D3D11_NAME_FINAL_QUAD_INSIDE_TESSFACTOR, + D3D11_NAME_FINAL_TRI_EDGE_TESSFACTOR, + D3D11_NAME_FINAL_TRI_INSIDE_TESSFACTOR, + D3D11_NAME_FINAL_LINE_DETAIL_TESSFACTOR, + D3D11_NAME_FINAL_LINE_DENSITY_TESSFACTOR, + + D3D10_NAME_TARGET = 64, + D3D10_NAME_DEPTH , + D3D10_NAME_COVERAGE, + + D3D11_NAME_DEPTH_GREATER_EQUAL, + D3D11_NAME_DEPTH_LESS_EQUAL, +} D3D_NAME; + +typedef enum D3D_RESOURCE_RETURN_TYPE +{ + D3D_RETURN_TYPE_UNORM = 1, + D3D_RETURN_TYPE_SNORM, + D3D_RETURN_TYPE_SINT, + D3D_RETURN_TYPE_UINT, + D3D_RETURN_TYPE_FLOAT, + D3D_RETURN_TYPE_MIXED, + D3D_RETURN_TYPE_DOUBLE, + D3D_RETURN_TYPE_CONTINUED, + + D3D10_RETURN_TYPE_UNORM = 1, + D3D10_RETURN_TYPE_SNORM, + D3D10_RETURN_TYPE_SINT, + D3D10_RETURN_TYPE_UINT, + D3D10_RETURN_TYPE_FLOAT, + D3D10_RETURN_TYPE_MIXED, + + D3D11_RETURN_TYPE_UNORM = 1, + D3D11_RETURN_TYPE_SNORM, + D3D11_RETURN_TYPE_SINT, + D3D11_RETURN_TYPE_UINT, + D3D11_RETURN_TYPE_FLOAT, + D3D11_RETURN_TYPE_MIXED, + D3D11_RETURN_TYPE_DOUBLE, + D3D11_RETURN_TYPE_CONTINUED, +} D3D_RESOURCE_RETURN_TYPE; + +typedef enum D3D_REGISTER_COMPONENT_TYPE +{ + D3D_REGISTER_COMPONENT_UNKNOWN = 0, + D3D_REGISTER_COMPONENT_UINT32, + D3D_REGISTER_COMPONENT_SINT32, + D3D_REGISTER_COMPONENT_FLOAT32, + + D3D10_REGISTER_COMPONENT_UNKNOWN = 0, + D3D10_REGISTER_COMPONENT_UINT32, + D3D10_REGISTER_COMPONENT_SINT32, + D3D10_REGISTER_COMPONENT_FLOAT32, +} D3D_REGISTER_COMPONENT_TYPE; + +typedef enum D3D_TESSELLATOR_DOMAIN +{ + D3D_TESSELLATOR_DOMAIN_UNDEFINED = 0, + D3D_TESSELLATOR_DOMAIN_ISOLINE, + D3D_TESSELLATOR_DOMAIN_TRI, + D3D_TESSELLATOR_DOMAIN_QUAD, + + D3D11_TESSELLATOR_DOMAIN_UNDEFINED = 0, + D3D11_TESSELLATOR_DOMAIN_ISOLINE, + D3D11_TESSELLATOR_DOMAIN_TRI, + D3D11_TESSELLATOR_DOMAIN_QUAD, +} D3D_TESSELLATOR_DOMAIN; + +typedef enum D3D_TESSELLATOR_PARTITIONING +{ + D3D_TESSELLATOR_PARTITIONING_UNDEFINED = 0, + D3D_TESSELLATOR_PARTITIONING_INTEGER, + D3D_TESSELLATOR_PARTITIONING_POW2, + D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD, + D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN, + + D3D11_TESSELLATOR_PARTITIONING_UNDEFINED = 0, + D3D11_TESSELLATOR_PARTITIONING_INTEGER, + D3D11_TESSELLATOR_PARTITIONING_POW2, + D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD, + D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN, +} D3D_TESSELLATOR_PARTITIONING; + +typedef enum D3D_TESSELLATOR_OUTPUT_PRIMITIVE +{ + D3D_TESSELLATOR_OUTPUT_UNDEFINED = 0, + D3D_TESSELLATOR_OUTPUT_POINT, + D3D_TESSELLATOR_OUTPUT_LINE, + D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW, + D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW, + + D3D11_TESSELLATOR_OUTPUT_UNDEFINED = 0, + D3D11_TESSELLATOR_OUTPUT_POINT, + D3D11_TESSELLATOR_OUTPUT_LINE, + D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW, + D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW, +} D3D_TESSELLATOR_OUTPUT_PRIMITIVE; + diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/dxgi.idl b/src/gallium/state_trackers/d3d1x/d3dapi/dxgi.idl new file mode 100644 index 0000000000..86ef261f67 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/dxgi.idl @@ -0,0 +1,470 @@ +/* + * Copyright 2007 Andras Kovacs + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/*DXGI 1.1 IDL, and missing DXGI 1.0 parts added by Luca Barbieri on Sep 2010 */ + +import "dxgitype.idl"; + +const UINT _FACDXGI = 0x87a; + +cpp_quote("#define MAKE_DXGI_STATUS(x) MAKE_HRESULT(0, _FACDXGI, x)") +cpp_quote("#define DXGI_STATUS_OCCLUDED MAKE_DXGI_STATUS(1)") +cpp_quote("#define DXGI_STATUS_CLIPPED MAKE_DXGI_STATUS(2)") +cpp_quote("#define DXGI_STATUS_NO_REDIRECTION MAKE_DXGI_STATUS(4)") +cpp_quote("#define DXGI_STATUS_NO_DESKTOP_ACCESS MAKE_DXGI_STATUS(5)") +cpp_quote("#define DXGI_STATUS_GRAPHICS_VIDPN_SOURCE_IN_USE MAKE_DXGI_STATUS(6)") +cpp_quote("#define DXGI_STATUS_MODE_CHANGED MAKE_DXGI_STATUS(7)") +cpp_quote("#define DXGI_STATUS_MODE_CHANGE_IN_PROGRESS MAKE_DXGI_STATUS(8)") + +cpp_quote("#define MAKE_DXGI_HRESULT(x) MAKE_HRESULT(1, _FACDXGI, x)") +cpp_quote("#define DXGI_ERROR_INVALID_CALL MAKE_DXGI_HRESULT(1)") +cpp_quote("#define DXGI_ERROR_NOT_FOUND MAKE_DXGI_HRESULT(2)") +cpp_quote("#define DXGI_ERROR_MORE_DATA MAKE_DXGI_HRESULT(3)") +cpp_quote("#define DXGI_ERROR_UNSUPPORTED MAKE_DXGI_HRESULT(4)") +cpp_quote("#define DXGI_ERROR_DEVICE_REMOVED MAKE_DXGI_HRESULT(5)") +cpp_quote("#define DXGI_ERROR_DEVICE_HUNG MAKE_DXGI_HRESULT(6)") +cpp_quote("#define DXGI_ERROR_DEVICE_RESET MAKE_DXGI_HRESULT(7)") +cpp_quote("#define DXGI_ERROR_WAS_STILL_DRAWING MAKE_DXGI_HRESULT(10)") +cpp_quote("#define DXGI_ERROR_FRAME_STATISTICS_DISJOINT MAKE_DXGI_HRESULT(11)") +cpp_quote("#define DXGI_ERROR_GRAPHICS_VIDPN_SOURCE_IN_USE MAKE_DXGI_HRESULT(12)") +cpp_quote("#define DXGI_ERROR_DRIVER_INTERNAL_ERROR MAKE_DXGI_HRESULT(32)") +cpp_quote("#define DXGI_ERROR_NONEXCLUSIVE MAKE_DXGI_HRESULT(33)") +cpp_quote("#define DXGI_ERROR_NOT_CURRENTLY_AVAILABLE MAKE_DXGI_HRESULT(34)") + +cpp_quote("#if 0") +typedef HANDLE HMONITOR; +typedef struct _LUID { + DWORD LowPart; + LONG HighPart; +} LUID, *PLUID; +cpp_quote("#endif") + +typedef UINT DXGI_USAGE; +const DXGI_USAGE DXGI_USAGE_SHADER_INPUT = 0x10L; +const DXGI_USAGE DXGI_USAGE_RENDER_TARGET_OUTPUT = 0x20L; +const DXGI_USAGE DXGI_USAGE_BACK_BUFFER = 0x40L; +const DXGI_USAGE DXGI_USAGE_SHARED = 0x80L; +const DXGI_USAGE DXGI_USAGE_READ_ONLY = 0x100L; + +typedef enum DXGI_SWAP_EFFECT { + DXGI_SWAP_EFFECT_DISCARD = 0, + DXGI_SWAP_EFFECT_SEQUENTIAL = 1, +} DXGI_SWAP_EFFECT; + +typedef enum DXGI_RESIDENCY { + DXGI_RESIDENCY_FULLY_RESIDENT = 1, + DXGI_RESIDENCY_RESIDENT_IN_SHARED_MEMORY = 2, + DXGI_RESIDENCY_EVICTED_TO_DISK = 3, +} DXGI_RESIDENCY; + +typedef struct DXGI_SURFACE_DESC { + UINT Width; + UINT Height; + DXGI_FORMAT Format; + DXGI_SAMPLE_DESC SampleDesc; +} DXGI_SURFACE_DESC; + +typedef struct DXGI_MAPPED_RECT { + INT Pitch; + BYTE *pBits; +} DXGI_MAPPED_RECT; + +typedef struct DXGI_OUTPUT_DESC { + WCHAR DeviceName[32]; + RECT DesktopCoordinates; + BOOL AttachedToDesktop; + DXGI_MODE_ROTATION Rotation; + HMONITOR Monitor; +} DXGI_OUTPUT_DESC; + +typedef struct DXGI_FRAME_STATISTICS { + UINT PresentCount; + UINT PresentRefreshCount; + UINT SyncRefreshCount; + LARGE_INTEGER SyncQPCTime; + LARGE_INTEGER SyncGPUTime; +} DXGI_FRAME_STATISTICS; + +typedef struct DXGI_ADAPTER_DESC { + WCHAR Description[128]; + UINT VendorId; + UINT DeviceId; + UINT SubSysId; + UINT Revision; + SIZE_T DedicatedVideoMemory; + SIZE_T DedicatedSystemMemory; + SIZE_T SharedSystemMemory; + LUID AdapterLuid; +} DXGI_ADAPTER_DESC; + +typedef struct DXGI_SWAP_CHAIN_DESC { + DXGI_MODE_DESC BufferDesc; + DXGI_SAMPLE_DESC SampleDesc; + DXGI_USAGE BufferUsage; + UINT BufferCount; + HWND OutputWindow; + BOOL Windowed; + DXGI_SWAP_EFFECT SwapEffect; + UINT Flags; +} DXGI_SWAP_CHAIN_DESC; + +typedef struct DXGI_SHARED_RESOURCE { + HANDLE Handle; +} DXGI_SHARED_RESOURCE; + +[ + object, + local, + uuid(aec22fb8-76f3-4639-9be0-28eb43a67a2e) +] +interface IDXGIObject : IUnknown +{ + HRESULT SetPrivateData( + [in] REFGUID a, + [in] UINT b, + [in] const void *c + ); + HRESULT SetPrivateDataInterface( + [in] REFGUID a, + [in] const IUnknown *b + ); + HRESULT GetPrivateData( + [in] REFGUID a, + [in, out] UINT *b, + [out] void *c + ); + HRESULT GetParent( + [in] REFIID a, + [out] void **b + ); +} + +[ + object, + local, + uuid(3d3e0379-f9de-4d58-bb6c-18d62992f1a6) +] +interface IDXGIDeviceSubObject : IDXGIObject +{ + HRESULT GetDevice( + [in] REFIID a, + [out] void **b + ); +} + +[ + object, + local, + uuid(cafcb56c-6ac3-4889-bf47-9e23bbd260ec) +] +interface IDXGISurface : IDXGIDeviceSubObject +{ + HRESULT GetDesc( + [out] DXGI_SURFACE_DESC *a + ); + HRESULT Map( + [out] DXGI_MAPPED_RECT *a, + [in] UINT b + ); + HRESULT Unmap( + ); +} + +[ + object, + local, + uuid(ae02eedb-c735-4690-8d52-5a8dc20213aa) +] +interface IDXGIOutput : IDXGIObject +{ + HRESULT GetDesc( + [out] DXGI_OUTPUT_DESC *a + ); + HRESULT GetDisplayModeList( + [in] DXGI_FORMAT a, + [in] UINT b, + [in, out] UINT *c, + [out] DXGI_MODE_DESC *d + ); + HRESULT FindClosestMatchingMode( + [in] const DXGI_MODE_DESC *a, + [out] DXGI_MODE_DESC *b, + [in] IUnknown *c + ); + HRESULT WaitForVBlank( + ); + HRESULT TakeOwnership( + [in] IUnknown *a, + [in] BOOL b + ); + void ReleaseOwnership( + ); + HRESULT GetGammaControlCapabilities( + [out] DXGI_GAMMA_CONTROL_CAPABILITIES *a + ); + HRESULT SetGammaControl( + [in] const DXGI_GAMMA_CONTROL *a + ); + HRESULT GetGammaControl( + [out] DXGI_GAMMA_CONTROL *a + ); + HRESULT SetDisplaySurface( + [in] IDXGISurface *a + ); + HRESULT GetDisplaySurfaceData( + [in] IDXGISurface *a + ); + HRESULT GetFrameStatistics( + [out] DXGI_FRAME_STATISTICS *a + ); +} + +[ + object, + local, + uuid(2411e7e1-12ac-4ccf-bd14-9798e8534dc0) +] +interface IDXGIAdapter : IDXGIObject +{ + HRESULT EnumOutputs( + [in] UINT a, + [in, out] IDXGIOutput **b + ); + HRESULT GetDesc( + [out] DXGI_ADAPTER_DESC *a + ); + HRESULT CheckInterfaceSupport( + [in] REFGUID a, + [out] LARGE_INTEGER *b + ); +} + +[ + object, + local, + uuid(310d36a0-d2e7-4c0a-aa04-6a9d23b8886a) +] +interface IDXGISwapChain : IDXGIDeviceSubObject +{ + HRESULT Present( + [in] UINT a, + [in] UINT b + ); + HRESULT GetBuffer( + [in] UINT a, + [in] REFIID b, + [in, out] void **c + ); + HRESULT SetFullscreenState( + [in] BOOL a, + [in] IDXGIOutput *b + ); + HRESULT GetFullscreenState( + [out] BOOL *a, + [out] IDXGIOutput **b + ); + HRESULT GetDesc( + [out] DXGI_SWAP_CHAIN_DESC *a + ); + HRESULT ResizeBuffers( + [in] UINT a, + [in] UINT b, + [in] UINT c, + [in] DXGI_FORMAT d, + [in] UINT e + ); + HRESULT ResizeTarget( + [in] const DXGI_MODE_DESC *a + ); + HRESULT GetContainingOutput( + [out] IDXGIOutput **a + ); + HRESULT GetFrameStatistics( + [out] DXGI_FRAME_STATISTICS *a + ); + HRESULT GetLastPresentCount( + [out] UINT *a + ); +} + +[ + object, + local, + uuid(7b7166ec-21c7-44ae-b21a-c9ae321ae369) +] +interface IDXGIFactory : IDXGIObject +{ + HRESULT EnumAdapters( + [in] UINT a, + [out] IDXGIAdapter **b + ); + HRESULT MakeWindowAssociation( + [in] HWND a, + [in] UINT b + ); + HRESULT GetWindowAssociation( + [in] HWND *a + ); + HRESULT CreateSwapChain( + [in] IUnknown *a, + [in] DXGI_SWAP_CHAIN_DESC *b, + [out] IDXGISwapChain **c + ); + HRESULT CreateSoftwareAdapter( + [in] HMODULE a, + [out] IDXGIAdapter **b + ); +} + +[local] HRESULT CreateDXGIFactory(REFIID riid, void **factory); + +[ + object, + local, + uuid(54ec77fa-1377-44e6-8c32-88fd5f44c84c) +] +interface IDXGIDevice : IDXGIObject +{ + HRESULT GetAdapter( + [out] IDXGIAdapter **a + ); + HRESULT CreateSurface( + [in] const DXGI_SURFACE_DESC *a, + [in] UINT b, + [in] DXGI_USAGE c, + [in] const DXGI_SHARED_RESOURCE *d, + [out] IDXGISurface **e + ); + HRESULT QueryResourceResidency( + [in] IUnknown *const *a, + [out] DXGI_RESIDENCY *b, + [in] UINT c + ); + HRESULT SetGPUThreadPriority( + [in] INT a + ); + HRESULT GetGPUThreadPriority( + [out] INT *a + ); +} + + +// BEGIN parts added for Gallium + +const unsigned int DXGI_MAP_READ = 1; +const unsigned int DXGI_MAP_WRITE = 2; +const unsigned int DXGI_MAP_DISCARD = 4; + +const unsigned int DXGI_CPU_ACCESS_NONE = 0; +const unsigned int DXGI_CPU_ACCESS_DYNAMIC = 1; +const unsigned int DXGI_CPU_ACCESS_READ_WRITE = 2; +const unsigned int DXGI_CPU_ACCESS_SCRATCH = 3; +const unsigned int DXGI_CPU_ACCESS_FIELD = 15; +const unsigned int DXGI_USAGE_DISCARD_ON_PRESENT = 0x200; +const unsigned int DXGI_USAGE_UNORDERED_ACCESS = 0x400; + +const unsigned int DXGI_PRESENT_TEST = 1; +const unsigned int DXGI_PRESENT_DO_NOT_SEQUENCE = 2; +const unsigned int DXGI_PRESENT_RESTART = 4; + +typedef enum DXGI_SWAP_CHAIN_FLAG +{ + DXGI_SWAP_CHAIN_FLAG_NONPREROTATED = 1, + DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH = 2, + DXGI_SWAP_CHAIN_FLAG_GDI_COMPATIBLE = 4 +} DXGI_SWAP_CHAIN_FLAG; + +typedef struct DXGI_ADAPTER_DESC1 +{ + WCHAR Description[128]; + UINT VendorId; + UINT DeviceId; + UINT SubSysId; + UINT Revision; + SIZE_T DedicatedVideoMemory; + SIZE_T DedicatedSystemMemory; + SIZE_T SharedSystemMemory; + LUID AdapterLuid; + UINT Flags; +} DXGI_ADAPTER_DESC1; + +[object, local, uuid("035f3ab4-482e-4e50-b41f-8a7f8bd8960b")] +interface IDXGIResource : IDXGIDeviceSubObject +{ + HRESULT GetSharedHandle( + [out] HANDLE *a + ); + + HRESULT GetUsage( + [out] DXGI_USAGE *a + ); + + HRESULT SetEvictionPriority( + [in] UINT a + ); + + HRESULT GetEvictionPriority( + [out] UINT *a + ); +}; + +[object, local, uuid("4AE63092-6327-4c1b-80AE-BFE12EA32B86")] +interface IDXGISurface1 : IDXGISurface +{ + HRESULT GetDC( + [in] BOOL a, + [out] HDC *b + ); + + HRESULT ReleaseDC( + [in, optional] RECT *a + ); + }; + +[object, local, uuid("77db970f-6276-48ba-ba28-070143b4392c")] +interface IDXGIDevice1 : IDXGIDevice +{ + HRESULT SetMaximumFrameLatency( + [in] UINT a + ); + + HRESULT GetMaximumFrameLatency( + [out] UINT *a + ); +}; + +[object, local, uuid("29038f61-3839-4626-91fd-086879011a05")] +interface IDXGIAdapter1 : IDXGIAdapter +{ + HRESULT GetDesc1( + [out] DXGI_ADAPTER_DESC1 *a + ); +}; + +[object, local, uuid("770aae78-f26f-4dba-a829-253c83d1b387")] +interface IDXGIFactory1 : IDXGIFactory +{ + HRESULT EnumAdapters1( + [in] UINT a, + [out] IDXGIAdapter1 **b + ); + + BOOL IsCurrent(); +}; + +[local] HRESULT CreateDXGIFactory1(REFIID riid, void **factory); diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/dxgiformat.idl b/src/gallium/state_trackers/d3d1x/d3dapi/dxgiformat.idl new file mode 100644 index 0000000000..28846e9601 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/dxgiformat.idl @@ -0,0 +1,129 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +typedef enum DXGI_FORMAT { + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_R32G32B32A32_TYPELESS, + DXGI_FORMAT_R32G32B32A32_FLOAT, + DXGI_FORMAT_R32G32B32A32_UINT, + DXGI_FORMAT_R32G32B32A32_SINT, + DXGI_FORMAT_R32G32B32_TYPELESS, + DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R32G32B32_UINT, + DXGI_FORMAT_R32G32B32_SINT, + DXGI_FORMAT_R16G16B16A16_TYPELESS, + DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_R16G16B16A16_UNORM, + DXGI_FORMAT_R16G16B16A16_UINT, + DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_R32G32_TYPELESS, + DXGI_FORMAT_R32G32_FLOAT, + DXGI_FORMAT_R32G32_UINT, + DXGI_FORMAT_R32G32_SINT, + DXGI_FORMAT_R32G8X24_TYPELESS, + DXGI_FORMAT_D32_FLOAT_S8X24_UINT, + DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, + DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, + DXGI_FORMAT_R10G10B10A2_TYPELESS, + DXGI_FORMAT_R10G10B10A2_UNORM, + DXGI_FORMAT_R10G10B10A2_UINT, + DXGI_FORMAT_R11G11B10_FLOAT, + DXGI_FORMAT_R8G8B8A8_TYPELESS, + DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + DXGI_FORMAT_R8G8B8A8_UINT, + DXGI_FORMAT_R8G8B8A8_SNORM, + DXGI_FORMAT_R8G8B8A8_SINT, + DXGI_FORMAT_R16G16_TYPELESS, + DXGI_FORMAT_R16G16_FLOAT, + DXGI_FORMAT_R16G16_UNORM, + DXGI_FORMAT_R16G16_UINT, + DXGI_FORMAT_R16G16_SNORM, + DXGI_FORMAT_R16G16_SINT, + DXGI_FORMAT_R32_TYPELESS, + DXGI_FORMAT_D32_FLOAT, + DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_R32_UINT, + DXGI_FORMAT_R32_SINT, + DXGI_FORMAT_R24G8_TYPELESS, + DXGI_FORMAT_D24_UNORM_S8_UINT, + DXGI_FORMAT_R24_UNORM_X8_TYPELESS, + DXGI_FORMAT_X24_TYPELESS_G8_UINT, + DXGI_FORMAT_R8G8_TYPELESS, + DXGI_FORMAT_R8G8_UNORM, + DXGI_FORMAT_R8G8_UINT, + DXGI_FORMAT_R8G8_SNORM, + DXGI_FORMAT_R8G8_SINT, + DXGI_FORMAT_R16_TYPELESS, + DXGI_FORMAT_R16_FLOAT, + DXGI_FORMAT_D16_UNORM, + DXGI_FORMAT_R16_UNORM, + DXGI_FORMAT_R16_UINT, + DXGI_FORMAT_R16_SNORM, + DXGI_FORMAT_R16_SINT, + DXGI_FORMAT_R8_TYPELESS, + DXGI_FORMAT_R8_UNORM, + DXGI_FORMAT_R8_UINT, + DXGI_FORMAT_R8_SNORM, + DXGI_FORMAT_R8_SINT, + DXGI_FORMAT_A8_UNORM, + DXGI_FORMAT_R1_UNORM, + DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + DXGI_FORMAT_R8G8_B8G8_UNORM, + DXGI_FORMAT_G8R8_G8B8_UNORM, + DXGI_FORMAT_BC1_TYPELESS, + DXGI_FORMAT_BC1_UNORM, + DXGI_FORMAT_BC1_UNORM_SRGB, + DXGI_FORMAT_BC2_TYPELESS, + DXGI_FORMAT_BC2_UNORM, + DXGI_FORMAT_BC2_UNORM_SRGB, + DXGI_FORMAT_BC3_TYPELESS, + DXGI_FORMAT_BC3_UNORM, + DXGI_FORMAT_BC3_UNORM_SRGB, + DXGI_FORMAT_BC4_TYPELESS, + DXGI_FORMAT_BC4_UNORM, + DXGI_FORMAT_BC4_SNORM, + DXGI_FORMAT_BC5_TYPELESS, + DXGI_FORMAT_BC5_UNORM, + DXGI_FORMAT_BC5_SNORM, + DXGI_FORMAT_B5G6R5_UNORM, + DXGI_FORMAT_B5G5R5A1_UNORM, + DXGI_FORMAT_B8G8R8A8_UNORM, + DXGI_FORMAT_B8G8R8X8_UNORM, + DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, + DXGI_FORMAT_B8G8R8A8_TYPELESS, + DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, + DXGI_FORMAT_B8G8R8X8_TYPELESS, + DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, + DXGI_FORMAT_BC6H_TYPELESS, + DXGI_FORMAT_BC6H_UF16, + DXGI_FORMAT_BC6H_SF16, + DXGI_FORMAT_BC7_TYPELESS, + DXGI_FORMAT_BC7_UNORM, + DXGI_FORMAT_BC7_UNORM_SRGB, + DXGI_FORMAT_FORCE_UINT = 0xffffffff +} DXGI_FORMAT; diff --git a/src/gallium/state_trackers/d3d1x/d3dapi/dxgitype.idl b/src/gallium/state_trackers/d3d1x/d3dapi/dxgitype.idl new file mode 100644 index 0000000000..31eba08cff --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/d3dapi/dxgitype.idl @@ -0,0 +1,84 @@ +/* + * Copyright 2007 Andras Kovacs + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* DXGI 1.1 IDL, and missing DXGI 1.0 parts added by Luca Barbieri on Sep 2010 */ + +import "oaidl.idl"; +import "ocidl.idl"; + +import "dxgiformat.idl"; + +typedef struct DXGI_SAMPLE_DESC { + UINT Count; + UINT Quality; +} DXGI_SAMPLE_DESC; + +typedef enum DXGI_MODE_ROTATION { + DXGI_MODE_ROTATION_UNSPECIFIED = 0, + DXGI_MODE_ROTATION_IDENTITY = 1, + DXGI_MODE_ROTATION_ROTATE90 = 2, + DXGI_MODE_ROTATION_ROTATE180 = 3, + DXGI_MODE_ROTATION_ROTATE270 = 4, +} DXGI_MODE_ROTATION; + +typedef enum DXGI_MODE_SCANLINE_ORDER { + DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED = 0, + DXGI_MODE_SCANLINE_ORDER_PROGRESSIVE = 1, + DXGI_MODE_SCANLINE_ORDER_UPPER_FIELD_FIRST = 2, + DXGI_MODE_SCANLINE_ORDER_LOWER_FIELD_FIRST = 3, +} DXGI_MODE_SCANLINE_ORDER; + +typedef enum DXGI_MODE_SCALING { + DXGI_MODE_SCALING_UNSPECIFIED = 0, + DXGI_MODE_SCALING_CENTERED = 1, + DXGI_MODE_SCALING_STRETCHED = 2, +} DXGI_MODE_SCALING; + +typedef struct DXGI_RATIONAL { + UINT Numerator; + UINT Denominator; +} DXGI_RATIONAL; + +typedef struct DXGI_MODE_DESC { + UINT Width; + UINT Height; + DXGI_RATIONAL RefreshRate; + DXGI_FORMAT Format; + DXGI_MODE_SCANLINE_ORDER ScanlineOrdering; + DXGI_MODE_SCALING Scaling; +} DXGI_MODE_DESC; + +typedef struct DXGI_GAMMA_CONTROL_CAPABILITIES { + BOOL ScaleAndOffsetSupported; + float MaxConvertedValue; + float MinConvertedValue; + UINT NumGammaControlPoints; + float ControlPointPositions[1025]; +} DXGI_GAMMA_CONTROL_CAPABILITIES; + +typedef struct DXGI_RGB { + float Red; + float Green; + float Blue; +} DXGI_RGB; + +typedef struct DXGI_GAMMA_CONTROL { + DXGI_RGB Scale; + DXGI_RGB Offset; + DXGI_RGB GammaCurve[1025]; +} DXGI_GAMMA_CONTROL; diff --git a/src/gallium/state_trackers/d3d1x/docs/Makefile b/src/gallium/state_trackers/d3d1x/docs/Makefile new file mode 100644 index 0000000000..7f38fa73fb --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/docs/Makefile @@ -0,0 +1,5 @@ +all: module_dependencies.svg module_dependencies.pdf +.IGNORE: module_dependencies.svg module_dependencies.pdf + +include ../Makefile.inc + diff --git a/src/gallium/state_trackers/d3d1x/docs/coding_style.txt b/src/gallium/state_trackers/d3d1x/docs/coding_style.txt new file mode 100644 index 0000000000..fb09417eac --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/docs/coding_style.txt @@ -0,0 +1,84 @@ +The goal of these guidelines is to allow as much freedom as possible, while keeping the code buildable and pleasant to read. + +* Formatting + +- Indent with a single tab character. This is the best choice, since anyone can use the visual indentation he prefers by adjust the tab width setting in his editor. +- Align multiline statements with an additional extra tab before each continuation line +- Keep in mind that people can program with proportional fonts: hence, don't attempt to align anything not at the start of the line, since it's impossible +- In general, there should never be two consecutive spaces in the source code +- There is no strict limit on line length, but try to not make lines too long, and insert a line break where it looks good + +* Language/platform features + +All language features of C++03 with TR1 and all the STL library may be used. +Obviously, try to keep the code simple, readable and intuitive, code size small, and compilation time short where possible. +Platform/compiler-specific extensions can be used if beneficial, protected by #ifs. + +C++0x is currently not used since it's unreleased and currently not well supported by clang. +Once GCC, clang and Visual C++ all have very good or complete support, and ideally the standard is finalized, we can start taking advantage of it. +Change this document once that happens. + +Boost is currently not used because it hasn't been necessary and it's best to keep things simple. +If really necessary, add a dependency on it, but use it judiciously. + +C should be used only for old code, and preferably completely avoided. + +You can freely assume that char is 8-bit, short 16-bit and int 32-bit, that long and pointers are 32-bit or 64-bit, that long long is at least 64-bit, that float is 32-bit and that double is 64-bit. +However, when you intend a specific size, int8_t, etc. are preferred. + +* Naming style + + Code implementing public parts of Windows interfaces (and derived ones) should follow Windows naming conventions: + - Classes are like GalliumD3D11VertexShader + - Functions are like CreateVertexShader + - Variables are like ppVertexShader + + Other code should follow Gallium/Linux/POSIX/STL/Boost naming conventions: + - Classes are like maybe_mutex_t + - Functions are like xs_create_shader + - Variables are like sampler_view + + Template parameters are named accordingly to what looks best for the specific case. + Typically it will be FooBar for typename parameters and foo_bar for non-typename ones. + + * Implementation style + +See the comments in d3d1xstutil.h for the COM implementation method. +In particular, avoid multiple/virtual inheritance in favor of mixins where possible. + +Try to limit or avoid preprocessor magic and multiline macros and use templates instead where possible. +Often, you can lessen the preprocessor magic by putting some of it in a template instantiated by the remaining magic. + +Forward declarations should not be used unless necessary. +In particular C++ classes should be implemented "inline" and should you should almost never have a forward declaration of a class. +To achieve this, you can opt to create an "interface class", which goes into an header or earlier in the C++ file, and an "implementation class" with goes in the C++ file. +Alternatively, use global helpers with forward declaration. + +Order definitions so that forward declarations are not necessary (e.g. put main at the end of the file). + +Choose between "struct" or "class" depending on whether the first declared member is public or private, to save the explicit specifier. + +Try to use const appropriately, esp. as a qualifier for member functions. + +Try to avoid Microsoft-style TYPES like FLOAT, UINT, etc. in favor of the usual C types like float, unsigned. + +Where feasible, if a platform is missing a function/keyword, add a definition of it with the standard name, rather than inventing an "abstraction layer". + +Try to use typedefs for STL maps on which you need to declare iterations, as well as function pointers or other "weird" C types. + + To iterate, use the following idiom from LLVM, which is optimal, unless end() is trivial: + for(iterator_type i = begin(), e = end(); i != e; ++i) + {} + + Otherwise, you risk the compiler evaluating end() for each loop iteration. + If end() is trivial, use this: + for(iterator_type i = begin(); i != end(); ++i) + {} + + Note the "++i" instead of the "i++" to avoid creating an unnecessary copy (esp. with overloaded operators). + + Declare variables just before they are needed, and inside the for() header. + Usually, you should initialize variable in the declaration if that's the only assignment or if it is a default value, and as a separate assignment if not. + +Try to use C++ references (with const if appropriate) when the pointer must be non-null, and that type is not already customarily passed with a pointer. + diff --git a/src/gallium/state_trackers/d3d1x/docs/module_dependencies.dot b/src/gallium/state_trackers/d3d1x/docs/module_dependencies.dot new file mode 100644 index 0000000000..3db6fb2167 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/docs/module_dependencies.dot @@ -0,0 +1,25 @@ +digraph module_dependencies +{ + d3dapi -> w32api; + gd3dapi -> d3dapi; + progs -> d3dapi [style="dotted"]; + progs -> gd3dapi [style="dotted"]; + progs -> microsoft_directx_sdk [style="dotted"]; + d3d1xstutil -> gd3dapi + d3d1xshader -> d3dapi + gd3d1x -> d3d1xshader; + gd3d1x -> d3d1xstutil; + gd3d10 -> gd3d1x; + gd3d11 -> gd3d1x; + dxgi -> d3d1xstutil; + dxgi -> gd3dapi; + dxgid3d10 -> gd3dapi; + dxgid3d11 -> gd3dapi; + "d3d11.dll" -> gd3d11; + "d3d11.dll" -> dxgid3d11; + "d3d10.dll" -> gd3d10; + "d3d10.dll" -> dxgid3d10; + "dxgi.dll" -> dxgi; + tools -> mstools + mstools -> microsoft_directx_sdk +}; diff --git a/src/gallium/state_trackers/d3d1x/docs/source_layout.txt b/src/gallium/state_trackers/d3d1x/docs/source_layout.txt new file mode 100644 index 0000000000..46e9f2d983 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/docs/source_layout.txt @@ -0,0 +1,17 @@ +Source layout and architecture of the Gallium D3D state tracker + +w32api is a link to Wine's include files for the Windows API +d3dapi contains the headers for Direct3D 10.0, 10.1 and 11.0 (independently created, except d3d10 which is based on Wine) +gd3dapi contains the Gallium COM state tracker API and extensions to the DXGI and Direct3D APIs, both for internal and external usage +d3d1xshader is a standalone module with a parser, disassembler and utility routines for Direct3D 10/11 shaders using Shader Model 4/5 instructions encoded using Tokenized Program Format embedded in a DXBC chunked container (the data format produced by the HLSL compiler). +mstools contains a downloader for the Microsoft HLSL compiler +tools contains the shader compiler, currently wrapping the Microsoft HLSL compiler +gd3d10 contains the implementation of Direct3D 10 and 10.1 with Gallium-specific entry points +gd3d11 contains the implementation of Direct3D 11 with Gallium-specific entry points +gd3d1x contains the shader translator and code not directly implementing Direct3D interfaces, but needed by those implementations +dxgid3d10 contains the DXGI-based "official" entry points to Direct3D 10.0 and 10.1 +dxgid3d11 contains the DXGI-based "official" entry points to Direct3D 11 +dxgi contains the implementation of DXGI (currently over the EGL native interface) +d3d1xstutil contains helper code shared among all the modules in the state tracker +programs contains the Gallium Direct3D 11 demos, runnable either on the Microsoft or Gallium implementations +docs contains documentation diff --git a/src/gallium/state_trackers/d3d1x/dxgi/Makefile b/src/gallium/state_trackers/d3d1x/dxgi/Makefile new file mode 100644 index 0000000000..6cdc33b8c4 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/dxgi/Makefile @@ -0,0 +1,17 @@ +LIBNAME=dxgi +LIBRARY_INCLUDES=-I../../../../../include -Iinclude -I../gd3dapi -I../d3dapi -I../w32api -I../d3d1xstutil/include -I../include -I../../../include -I../../../auxiliary -I../../../state_trackers/egl/common +LIBRARY_DEFINES=-DDXGI_DRIVER_SEARCH_DIR=\"$(EGL_DRIVER_INSTALL_DIR)\" +CPP_SOURCES=$(wildcard src/*.cpp) + +include ../Makefile.inc + +ifneq ($(findstring x11, $(EGL_PLATFORMS)),) +LIBRARY_DEFINES += -DGALLIUM_DXGI_USE_X11 +endif +ifneq ($(findstring drm, $(EGL_PLATFORMS)),) +LIBRARY_DEFINES += -DGALLIUM_DXGI_USE_DRM +endif +ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) +LIBRARY_DEFINES += -DGALLIUM_DXGI_USE_FBDEV +endif + diff --git a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_loader.cpp b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_loader.cpp new file mode 100644 index 0000000000..7b72019458 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_loader.cpp @@ -0,0 +1,206 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include "dxgi_private.h" +#include <stdio.h> +extern "C" +{ +#include "state_tracker/drm_driver.h" +#include "util/u_dl.h" +} +#define PIPE_PREFIX "pipe_" + +static const char * +get_search_path(void) +{ + static const char *search_path; + + if (!search_path) { + static char buffer[1024]; + const char *p; + int ret; + + p = getenv("DXGI_DRIVERS_PATH"); + if(!p) + p = getenv("EGL_DRIVERS_PATH"); +#ifdef __unix__ + if (p && (geteuid() != getuid() || getegid() != getgid())) { + p = NULL; + } +#endif + + if (p) { + ret = snprintf(buffer, sizeof(buffer), + "%s:%s", p, DXGI_DRIVER_SEARCH_DIR); + if (ret > 0 && ret < (int)sizeof(buffer)) + search_path = buffer; + } + } + if (!search_path) + search_path = DXGI_DRIVER_SEARCH_DIR; + + return search_path; +} + +static void +for_each_colon_separated(const char *search_path, + bool (*loader)(const char *, size_t, void *), + void *loader_data) +{ + const char *cur, *next; + size_t len; + + cur = search_path; + while (cur) { + next = strchr(cur, ':'); + len = (next) ? next - cur : strlen(cur); + + if (!loader(cur, len, loader_data)) + break; + + cur = (next) ? next + 1 : NULL; + } +} + +void +for_each_in_search_path(bool (*callback)(const char *, size_t, void *), + void *callback_data) +{ + const char *search_path = get_search_path(); + for_each_colon_separated(search_path, callback, callback_data); +} + +static struct pipe_module { + boolean initialized; + char *name; + struct util_dl_library *lib; + const struct drm_driver_descriptor *drmdd; + struct pipe_screen *(*swrast_create_screen)(struct sw_winsys *); +} pipe_modules[16]; + +static bool +dlopen_pipe_module_cb(const char *dir, size_t len, void *callback_data) +{ + struct pipe_module *pmod = (struct pipe_module *) callback_data; + char path[1024]; + int ret; + + if (len) { + ret = snprintf(path, sizeof(path), + "%.*s/" PIPE_PREFIX "%s" UTIL_DL_EXT, len, dir, pmod->name); + } + else { + ret = snprintf(path, sizeof(path), + PIPE_PREFIX "%s" UTIL_DL_EXT, pmod->name); + } + if (ret > 0 && ret < (int)sizeof(path)) { + pmod->lib = util_dl_open(path); + } + + return !(pmod->lib); +} + +static bool +load_pipe_module(struct pipe_module *pmod, const char *name) +{ + pmod->name = strdup(name); + if (!pmod->name) + return FALSE; + + for_each_in_search_path(dlopen_pipe_module_cb, (void *) pmod); + if (pmod->lib) { + pmod->drmdd = (const struct drm_driver_descriptor *) + util_dl_get_proc_address(pmod->lib, "driver_descriptor"); + + /* sanity check on the name */ + if (pmod->drmdd && strcmp(pmod->drmdd->name, pmod->name) != 0) + pmod->drmdd = NULL; + + /* swrast */ + if (pmod->drmdd && !pmod->drmdd->driver_name) { + pmod->swrast_create_screen = + (struct pipe_screen *(*)(struct sw_winsys *)) + util_dl_get_proc_address(pmod->lib, "swrast_create_screen"); + if (!pmod->swrast_create_screen) + pmod->drmdd = NULL; + } + + if (!pmod->drmdd) { + util_dl_close(pmod->lib); + pmod->lib = NULL; + } + } + + return (pmod->drmdd != NULL); +} + + +static struct pipe_module * +get_pipe_module(const char *name) +{ + struct pipe_module *pmod = NULL; + unsigned i; + + if (!name) + return NULL; + + for (i = 0; i < sizeof(pipe_modules) / sizeof(pipe_modules[0]); i++) { + if (!pipe_modules[i].initialized || + strcmp(pipe_modules[i].name, name) == 0) { + pmod = &pipe_modules[i]; + break; + } + } + if (!pmod) + return NULL; + + if (!pmod->initialized) { + load_pipe_module(pmod, name); + pmod->initialized = TRUE; + } + + return pmod; +} + +struct native_display; + +struct pipe_screen * +dxgi_loader_create_drm_screen(struct native_display* dpy, const char *name, int fd) +{ + struct pipe_module *pmod = get_pipe_module(name); + return (pmod && pmod->drmdd && pmod->drmdd->create_screen) ? + pmod->drmdd->create_screen(fd) : NULL; +} + +struct pipe_screen * +dxgi_loader_create_sw_screen(struct native_display* dpy, struct sw_winsys *ws) +{ + struct pipe_module *pmod = get_pipe_module("swrast"); + return (pmod && pmod->swrast_create_screen) ? + pmod->swrast_create_screen(ws) : NULL; +} diff --git a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp new file mode 100644 index 0000000000..e1c34611d1 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp @@ -0,0 +1,1500 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "dxgi_private.h" +extern "C" { +#include "native.h" +#include <util/u_format.h> +#include <util/u_inlines.h> +#include <util/u_simple_shaders.h> +#include <pipe/p_shader_tokens.h> +} +#include <iostream> +#include <memory> + +struct GalliumDXGIOutput; +struct GalliumDXGIAdapter; +struct GalliumDXGISwapChain; +struct GalliumDXGIFactory; + +static HRESULT GalliumDXGISwapChainCreate(GalliumDXGIFactory* factory, IUnknown* device, const DXGI_SWAP_CHAIN_DESC& desc, IDXGISwapChain** out_swap_chain); +static HRESULT GalliumDXGIAdapterCreate(GalliumDXGIFactory* adapter, const struct native_platform* platform, void* dpy, IDXGIAdapter1** out_adapter); +static HRESULT GalliumDXGIOutputCreate(GalliumDXGIAdapter* adapter, const std::string& name, const struct native_connector* connector, IDXGIOutput** out_output); +static void GalliumDXGISwapChainRevalidate(IDXGISwapChain* swap_chain); + +template<typename Base = IDXGIObject, typename Parent = IDXGIObject> +struct GalliumDXGIObject : public GalliumPrivateDataComObject<Base> +{ + ComPtr<Parent> parent; + + GalliumDXGIObject(Parent* p_parent = 0) + { + this->parent = p_parent; + } + + virtual HRESULT STDMETHODCALLTYPE GetParent( + REFIID riid, + void **out_parent) + { + return parent->QueryInterface(riid, out_parent); + } +}; + +COM_INTERFACE(IGalliumDXGIBackend, IUnknown) + +// TODO: somehow check whether the window is fully obscured or not +struct GalliumDXGIIdentityBackend : public GalliumComObject<IGalliumDXGIBackend> +{ + virtual HRESULT STDMETHODCALLTYPE BeginPresent( + HWND hwnd, + void** present_cookie, + void** window, + RECT *rect, + RGNDATA **rgndata, + BOOL* preserve_aspect_ratio + ) + { + *window = (void*)hwnd; + rect->left = 0; + rect->top = 0; + rect->right = INT_MAX; + rect->bottom = INT_MAX; + *rgndata = 0; + + // yes, because we like things looking good + *preserve_aspect_ratio = TRUE; + *present_cookie = 0; + return S_OK; + } + + virtual void STDMETHODCALLTYPE EndPresent( + HWND hwnd, + void* present_cookie + ) + {} + + virtual HRESULT STDMETHODCALLTYPE TestPresent(HWND hwnd) + { + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetPresentSize( + HWND hwnd, + unsigned* width, + unsigned* height + ) + { + *width = 0; + *height = 0; + return S_OK; + } +}; + +// TODO: maybe install an X11 error hook, so we can return errors properly +struct GalliumDXGIX11IdentityBackend : public GalliumDXGIIdentityBackend +{ + Display* dpy; + + GalliumDXGIX11IdentityBackend(Display* dpy) + : dpy(dpy) + {} + + virtual HRESULT STDMETHODCALLTYPE GetPresentSize( + HWND hwnd, + unsigned* width, + unsigned* height + ) + { + XWindowAttributes xwa; + XGetWindowAttributes(dpy, (Window)hwnd, &xwa); + *width = xwa.width; + *height = xwa.height; + return S_OK; + } +}; + +struct GalliumDXGIFactory : public GalliumDXGIObject<IDXGIFactory1, IUnknown> +{ + HWND associated_window; + const struct native_platform* platform; + void* display; + ComPtr<IGalliumDXGIBackend> backend; + void* resolver_cookie; + + GalliumDXGIFactory(const struct native_platform* platform, void* display, IGalliumDXGIBackend* p_backend) + : GalliumDXGIObject<IDXGIFactory1, IUnknown>((IUnknown*)NULL), platform(platform), display(display) + { + if(p_backend) + backend = p_backend; + else if(!strcmp(platform->name, "X11")) + backend.reset(new GalliumDXGIX11IdentityBackend((Display*)display)); + else + backend.reset(new GalliumDXGIIdentityBackend()); + } + + virtual HRESULT STDMETHODCALLTYPE EnumAdapters( + UINT adapter, + IDXGIAdapter **out_adapter) + { + return EnumAdapters1(adapter, (IDXGIAdapter1**)out_adapter); + } + + virtual HRESULT STDMETHODCALLTYPE EnumAdapters1( + UINT adapter, + IDXGIAdapter1 **out_adapter) + { + *out_adapter = 0; + if(adapter == 0) + { + return GalliumDXGIAdapterCreate(this, platform, display, out_adapter); + } +#if 0 + // TODO: enable this + if(platform == native_get_x11_platform()) + { + unsigned nscreens = ScreenCount((Display*)display); + if(adapter < nscreens) + { + unsigned def_screen = DefaultScreen(display); + if(adapter <= def_screen) + --adapter; + *out_adapter = GalliumDXGIAdapterCreate(this, platform, display, adapter); + return S_OK; + } + } +#endif + return DXGI_ERROR_NOT_FOUND; + } + + /* TODO: this is a mysterious underdocumented magic API + * Can we have multiple windows associated? + * Can we have multiple windows associated if we use multiple factories? + * If so, what should GetWindowAssociation return? + * If not, does a new swapchain steal the association? + * Does this act for existing swapchains? For new swapchains? + */ + virtual HRESULT STDMETHODCALLTYPE MakeWindowAssociation( + HWND window_handle, + UINT flags) + { + /* TODO: actually implement, for Wine, X11 and KMS*/ + associated_window = window_handle; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetWindowAssociation( + HWND *pwindow_handle) + { + *pwindow_handle = associated_window; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreateSwapChain( + IUnknown *device, + DXGI_SWAP_CHAIN_DESC *desc, + IDXGISwapChain **out_swap_chain) + { + return GalliumDXGISwapChainCreate(this, device, *desc, out_swap_chain); + } + + virtual HRESULT STDMETHODCALLTYPE CreateSoftwareAdapter( + HMODULE module, + IDXGIAdapter **out_adapter) + { + /* TODO: ignore the module, and just create a Gallium software screen */ + *out_adapter = 0; + return E_NOTIMPL; + } + + /* TODO: support hotplug */ + virtual BOOL STDMETHODCALLTYPE IsCurrent( void) + { + return TRUE; + } +}; + +struct GalliumDXGIAdapter + : public GalliumMultiComObject< + GalliumDXGIObject<IDXGIAdapter1, GalliumDXGIFactory>, + IGalliumAdapter> +{ + struct native_display* display; + const struct native_config** configs; + std::unordered_multimap<unsigned, unsigned> configs_by_pipe_format; + std::unordered_map<unsigned, unsigned> configs_by_native_visual_id; + const struct native_connector** connectors; + unsigned num_configs; + DXGI_ADAPTER_DESC1 desc; + std::vector<ComPtr<IDXGIOutput> > outputs; + int num_outputs; + struct native_event_handler handler; + + GalliumDXGIAdapter(GalliumDXGIFactory* factory, const struct native_platform* platform, void* dpy) + { + this->parent = factory; + + handler.invalid_surface = handle_invalid_surface; + handler.new_drm_screen = dxgi_loader_create_drm_screen; + handler.new_sw_screen = dxgi_loader_create_sw_screen; + display = platform->create_display(dpy, &handler, this); + if(!display) + throw E_FAIL; + memset(&desc, 0, sizeof(desc)); + std::string s = std::string("GalliumD3D on ") + display->screen->get_name(display->screen) + " by " + display->screen->get_vendor(display->screen); + + /* hopefully no one will decide to use UTF-8 in Gallium name/vendor strings */ + for(int i = 0; i < std::min((int)s.size(), 127); ++i) + desc.Description[i] = (WCHAR)s[i]; + + // TODO: add an interface to get these; for now, return mid/low values + desc.DedicatedVideoMemory = 256 << 20; + desc.DedicatedSystemMemory = 256 << 20; + desc.SharedSystemMemory = 1024 << 20; + + // TODO: we should actually use an unique ID instead + *(void**)&desc.AdapterLuid = dpy; + + configs = display->get_configs(display, (int*)&num_configs); + for(unsigned i = 0; i < num_configs; ++i) + { + if(configs[i]->window_bit) + { + configs_by_pipe_format.insert(std::make_pair(configs[i]->color_format, i)); + configs_by_native_visual_id[configs[i]->native_visual_id] = i; + } + } + + connectors = 0; + num_outputs = 0; + + if(display->modeset) + { + int num_crtcs; + + connectors = display->modeset->get_connectors(display, &num_outputs, &num_crtcs); + if(!connectors) + num_outputs = 0; + else if(!num_outputs) + { + free(connectors); + connectors = 0; + } + } + if(!num_outputs) + num_outputs = 1; + } + + static void handle_invalid_surface(struct native_display *ndpy, struct native_surface *nsurf, unsigned int seq_num) + { + GalliumDXGISwapChainRevalidate((IDXGISwapChain*)nsurf->user_data); + } + + ~GalliumDXGIAdapter() + { + display->destroy(display); + free(configs); + free(connectors); + } + + virtual HRESULT STDMETHODCALLTYPE EnumOutputs( + UINT output, + IDXGIOutput **out_output) + { + if(output >= (unsigned)num_outputs) + return DXGI_ERROR_NOT_FOUND; + + if(connectors) + { + std::ostringstream ss; + ss << "output #" << output; + return GalliumDXGIOutputCreate(this, ss.str(), connectors[output], out_output); + } + else + return GalliumDXGIOutputCreate(this, "Unique output", NULL, out_output); + } + + virtual HRESULT STDMETHODCALLTYPE GetDesc( + DXGI_ADAPTER_DESC *desc) + { + memcpy(desc, &desc, sizeof(*desc)); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetDesc1( + DXGI_ADAPTER_DESC1 *desc) + { + memcpy(desc, &desc, sizeof(*desc)); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CheckInterfaceSupport( + REFGUID interface_name, + LARGE_INTEGER *u_m_d_version) + { + // these number was taken from Windows 7 with Catalyst 10.8: its meaning is unclear + if(interface_name == IID_ID3D11Device || interface_name == IID_ID3D10Device1 || interface_name == IID_ID3D10Device) + { + u_m_d_version->QuadPart = 0x00080011000a0411ULL; + return S_OK; + } + return DXGI_ERROR_UNSUPPORTED; + } + + pipe_screen* STDMETHODCALLTYPE GetGalliumScreen() + { + return display->screen; + } + + pipe_screen* STDMETHODCALLTYPE GetGalliumReferenceSoftwareScreen() + { + // TODO: give a softpipe screen + return display->screen; + } + + pipe_screen* STDMETHODCALLTYPE GetGalliumFastSoftwareScreen() + { + // TODO: give an llvmpipe screen + return display->screen; + } +}; + + +struct GalliumDXGIOutput : public GalliumDXGIObject<IDXGIOutput, GalliumDXGIAdapter> +{ + DXGI_OUTPUT_DESC desc; + const struct native_mode** modes; + DXGI_MODE_DESC* dxgi_modes; + unsigned num_modes; + const struct native_connector* connector; + DXGI_GAMMA_CONTROL* gamma; + + GalliumDXGIOutput(GalliumDXGIAdapter* adapter, std::string name, const struct native_connector* connector = 0) + : GalliumDXGIObject<IDXGIOutput, GalliumDXGIAdapter>(adapter), connector(connector) + { + memset(&desc, 0, sizeof(desc)); + for(unsigned i = 0; i < std::min(name.size(), sizeof(desc.DeviceName) - 1); ++i) + desc.DeviceName[i] = name[i]; + desc.AttachedToDesktop = TRUE; + /* TODO: should put an HMONITOR in desc.Monitor */ + + gamma = 0; + num_modes = 0; + modes = 0; + if(connector) + { + modes = parent->display->modeset->get_modes(parent->display, connector, (int*)&num_modes); + if(modes && num_modes) + { + dxgi_modes = new DXGI_MODE_DESC[num_modes]; + for(unsigned i = 0; i < num_modes; ++i) + { + dxgi_modes[i].Width = modes[i]->width; + dxgi_modes[i].Height = modes[i]->height; + dxgi_modes[i].RefreshRate.Numerator = modes[i]->refresh_rate; + dxgi_modes[i].RefreshRate.Denominator = 1; + dxgi_modes[i].Scaling = DXGI_MODE_SCALING_UNSPECIFIED; + dxgi_modes[i].ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED; + } + } + else + { + if(modes) + { + free(modes); + modes = 0; + } + goto use_fake_mode; + } + } + else + { +use_fake_mode: + dxgi_modes = new DXGI_MODE_DESC[1]; + dxgi_modes[0].Width = 1920; + dxgi_modes[0].Height = 1200; + dxgi_modes[0].RefreshRate.Numerator = 60; + dxgi_modes[0].RefreshRate.Denominator = 1; + dxgi_modes[0].Scaling = DXGI_MODE_SCALING_UNSPECIFIED; + dxgi_modes[0].ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED; + } + } + + ~GalliumDXGIOutput() + { + delete [] dxgi_modes; + free(modes); + if(gamma) + delete gamma; + } + + virtual HRESULT STDMETHODCALLTYPE GetDesc( + DXGI_OUTPUT_DESC *out_desc) + { + *out_desc = desc; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetDisplayModeList( + DXGI_FORMAT enum_format, + UINT flags, + UINT *pcount, + DXGI_MODE_DESC *desc) + { + /* TODO: should we return DXGI_ERROR_NOT_CURRENTLY_AVAILABLE when we don't + * support modesetting instead of fake modes? + */ + pipe_format format = dxgi_to_pipe_format[enum_format]; + if(parent->configs_by_pipe_format.count(format)) + { + if(!desc) + { + *pcount = num_modes; + return S_OK; + } + + unsigned copy_modes = std::min(num_modes, *pcount); + for(unsigned i = 0; i < copy_modes; ++i) + { + desc[i] = dxgi_modes[i]; + desc[i].Format = enum_format; + } + *pcount = num_modes; + + if(copy_modes < num_modes) + return DXGI_ERROR_MORE_DATA; + else + return S_OK; + } + else + { + *pcount = 0; + return S_OK; + } + } + + virtual HRESULT STDMETHODCALLTYPE FindClosestMatchingMode( + const DXGI_MODE_DESC *pModeToMatch, + DXGI_MODE_DESC *closest_match, + IUnknown *concerned_device) + { + /* TODO: actually implement this */ + DXGI_FORMAT dxgi_format = pModeToMatch->Format; + enum pipe_format format = dxgi_to_pipe_format[dxgi_format]; + init_pipe_to_dxgi_format(); + if(!parent->configs_by_pipe_format.count(format)) + { + if(!concerned_device) + return E_FAIL; + else + { + format = parent->configs[0]->color_format; + dxgi_format = pipe_to_dxgi_format[format]; + } + } + + *closest_match = dxgi_modes[0]; + closest_match->Format = dxgi_format; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE WaitForVBlank( void) + { + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE TakeOwnership( + IUnknown *device, + BOOL exclusive) + { + return S_OK; + } + + virtual void STDMETHODCALLTYPE ReleaseOwnership( void) + { + } + + virtual HRESULT STDMETHODCALLTYPE GetGammaControlCapabilities( + DXGI_GAMMA_CONTROL_CAPABILITIES *gamma_caps) + { + memset(gamma_caps, 0, sizeof(*gamma_caps)); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE SetGammaControl( + const DXGI_GAMMA_CONTROL *pArray) + { + if(!gamma) + gamma = new DXGI_GAMMA_CONTROL; + *gamma = *pArray; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetGammaControl( + DXGI_GAMMA_CONTROL *pArray) + { + if(gamma) + *pArray = *gamma; + else + { + pArray->Scale.Red = 1; + pArray->Scale.Green = 1; + pArray->Scale.Blue = 1; + pArray->Offset.Red = 0; + pArray->Offset.Green = 0; + pArray->Offset.Blue = 0; + for(unsigned i = 0; i <= 1024; ++i) + pArray->GammaCurve[i].Red = pArray->GammaCurve[i].Green = pArray->GammaCurve[i].Blue = (float)i / 1024.0; + } + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE SetDisplaySurface( + IDXGISurface *scanout_surface) + { + return E_NOTIMPL; + } + + virtual HRESULT STDMETHODCALLTYPE GetDisplaySurfaceData( + IDXGISurface *destination) + { + return E_NOTIMPL; + } + + virtual HRESULT STDMETHODCALLTYPE GetFrameStatistics( + DXGI_FRAME_STATISTICS *stats) + { + memset(stats, 0, sizeof(*stats)); +#ifdef _WIN32 + QueryPerformanceCounter(&stats->SyncQPCTime); +#endif + return E_NOTIMPL; + } +}; + +/* Swap chain are rather complex, and Microsoft's documentation is rather + * lacking. As far as I know, this is the most thorough publicly available + * description of how swap chains work, based on multiple sources and + * experimentation. + * + * There are two modes (called "swap effects") that a swap chain can operate in: + * discard and sequential. + * + * In discard mode, things always look as if there is a single buffer, which + * you can get with GetBuffers(0). + * The 2D texture returned by GetBuffers(0) and can only be + * used as a render target view and for resource copies, since no CPU access + * flags are set and only the D3D11_BIND_RENDER_TARGET bind flag is set. + * On Present, it is copied to the actual display + * surface and the contents become undefined. + * D3D may internally use multiple buffers, but you can't observe this, except + * by looking at the buffer contents after Present (but those are undefined). + * If it uses multiple buffers internally, then it will normally use buffer_count buffers + * (this has latency implications). + * Discard mode seems to internally use a single buffer in windowed mode, + * even if DWM is enabled, and buffer_count buffers in fullscreen mode. + * + * In sequential mode, the runtime alllocates buffer_count buffers. + * You can get each with GetBuffers(n). + * GetBuffers(0) ALWAYS points to the backbuffer to be presented and has the + * same usage constraints as the discard mode. + * GetBuffer(n) with n > 0 points to resources that are identical to buffer 0, but + * are classified as "read-only resources" (due to DXGI_USAGE_READ_ONLY), + * meaning that you can't create render target views on them, or use them as + * a CopyResource/CopySubresourceRegion destination. + * It appears the only valid operation is to use them as a source for CopyResource + * and CopySubresourceRegion as well as just waiting for them to become + * buffer 0 again. + * Buffer n - 1 is always displayed on screen. + * When you call Present(), the contents of the buffers are rotated, so that buffer 0 + * goes to buffer n - 1, and is thus displayed, and buffer 1 goes to buffer 0, becomes + * the accessible back buffer. + * The resources themselves are NOT rotated, so that you can still render on the + * same ID3D11Texture2D*, and views based on it, that you got before Present(). + * + * Present seems to happen by either copying the relevant buffer into the window, + * or alternatively making it the current one, either by programming the CRTC or + * by sending the resource name to the DWM compositor. + * + * Hence, you can call GetBuffer(0) once and keep using the same ID3D11Texture2D* + * and ID3D11RenderTargetView* (and other views if needed) you got from it. + * + * If the window gets resized, DXGI will then "emulate" all successive presentations, + * by using a stretched blit automatically. + * Thus, you should handle WM_SIZE and call ResizeBuffers to update the DXGI + * swapchain buffers size to the new window size. + * Doing so requires you to release all GetBuffers() results and anything referencing + * them, including views and Direct3D11 deferred context command lists (this is + * documented). + * + * How does Microsoft implement the rotation behavior? + * It turns out that it does it by calling RotateResourceIdentitiesDXGI in the user-mode + * DDI driver. + * This will rotate the kernel buffer handle, or possibly rotate the GPU virtual memory + * mappings. + * + * The reason this is done by driver instead of by the runtime appears to be that + * this is necessary to support driver-provided command list support, since otherwise + * the command list would not always target the current backbuffer, since it would + * be done at the driver level, while only the runtime knows about the rotation. + * + * OK, so how do we implement this in Gallium? + * + * There are three strategies: + * 1. Use a single buffer, and always copy it to a window system provided buffer, or + * just give the buffer to the window system if it supports that + * 2. Rotate the buffers in the D3D1x implementation, and recreate and rebind the views. + * Don't support driver-provided command lists + * 3. Add this rotation functionality to the Gallium driver, with the idea that it would rotate + * remap GPU virtual memory, so that virtual address are unchanged, but the physical + * ones are rotated (so that pushbuffers remain valid). + * If the driver does not support this, either fall back to (1), or have a layer doing this, + * putting a deferred context layer over this intermediate layer. + * + * (2) is not acceptable since it prevents an optimal implementation. + * (3) is the ideal solution, but it is complicated. + * + * Hence, we implement (1) for now, and will switch to (3) later. + * + * Note that (1) doesn't really work for DXGI_SWAP_EFFECT_SEQUENTIAL with more + * than one buffer, so we just pretend we got asked for a single buffer in that case + * Fortunately, no one seems to rely on that, so we'll just not implement it at first, and + * later perform the rotation with blits. + * Once we switch to (3), we'll just use real rotation to do it.. + * + * DXGI_SWAP_EFFECT_SEQUENTIAL with more than one buffer is of dubious use + * anyway, since you can only render or write to buffer 0, and other buffers can apparently + * be used only as sources for copies. + * I was unable to find any code using it either in DirectX SDK examples, or on the web. + * + * It seems the only reason you would use it is to not have to redraw from scratch, while + * also possibly avoid a copy compared to buffer_count == 1, assuming that your + * application is OK with having to redraw starting not from the last frame, but from + * one/two/more frames behind it. + * + * A better design would forbid the user specifying buffer_count explicitly, and + * would instead let the application give an upper bound on how old the buffer can + * become after presentation, with "infinite" being equivalent to discard. + * The runtime would then tell the application with frame number the buffer switched to + * after present. + * In addition, in a better design, the application would be allowed to specify the + * number of buffers available, having all them usable for rendering, so that things + * like video players could efficiently decode frames in parallel. + * Present would in such a better design gain a way to specify the number of buffers + * to present. + * + * Other miscellaneous info: + * DXGI_PRESENT_DO_NOT_SEQUENCE causes DXGI to hold the frame for another + * vblank interval without rotating the resource data. + * + * References: + * "DXGI Overview" in MSDN + * IDXGISwapChain documentation on MSDN + * "RotateResourceIdentitiesDXGI" on MSDN + * http://forums.xna.com/forums/p/42362/266016.aspx + */ + +static float quad_data[] = { + -1, -1, 0, 0, + -1, 1, 0, 1, + 1, 1, 1, 1, + 1, -1, 1, 0, +}; + +struct dxgi_blitter +{ + pipe_context* pipe; + bool normalized; + void* fs; + void* vs; + void* sampler[2]; + void* elements; + void* blend; + void* rasterizer; + void* zsa; + struct pipe_clip_state clip; + struct pipe_vertex_buffer vbuf; + struct pipe_draw_info draw; + + dxgi_blitter(pipe_context* pipe) + : pipe(pipe) + { + //normalized = !!pipe->screen->get_param(pipe, PIPE_CAP_NPOT_TEXTURES); + // TODO: need to update buffer in unnormalized case + normalized = true; + + struct pipe_rasterizer_state rs_state; + memset(&rs_state, 0, sizeof(rs_state)); + rs_state.cull_face = PIPE_FACE_NONE; + rs_state.gl_rasterization_rules = 1; + rs_state.flatshade = 1; + rasterizer = pipe->create_rasterizer_state(pipe, &rs_state); + + struct pipe_blend_state blendd; + memset(&blendd, 0, sizeof(blendd)); + blendd.rt[0].colormask = PIPE_MASK_RGBA; + blend = pipe->create_blend_state(pipe, &blendd); + + struct pipe_depth_stencil_alpha_state zsad; + memset(&zsad, 0, sizeof(zsad)); + zsa = pipe->create_depth_stencil_alpha_state(pipe, &zsad); + + struct pipe_vertex_element velem[2]; + memset(&velem[0], 0, sizeof(velem[0]) * 2); + velem[0].src_offset = 0; + velem[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + velem[1].src_offset = 8; + velem[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + elements = pipe->create_vertex_elements_state(pipe, 2, &velem[0]); + + for(unsigned stretch = 0; stretch < 2; ++stretch) + { + struct pipe_sampler_state sampler_state; + memset(&sampler_state, 0, sizeof(sampler_state)); + sampler_state.min_img_filter = stretch ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; + sampler_state.mag_img_filter = stretch ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; + sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler_state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler_state.normalized_coords = normalized; + + sampler[stretch] = pipe->create_sampler_state(pipe, &sampler_state); + } + + fs = util_make_fragment_tex_shader(pipe, normalized ? TGSI_TEXTURE_2D : TGSI_TEXTURE_RECT, TGSI_INTERPOLATE_LINEAR); + + const unsigned semantic_names[] = { TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC }; + const unsigned semantic_indices[] = { 0, 0 }; + vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, semantic_indices); + + vbuf.buffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(quad_data)); + vbuf.buffer_offset = 0; + vbuf.max_index = ~0; + vbuf.stride = 4 * sizeof(float); + pipe_buffer_write(pipe, vbuf.buffer, 0, sizeof(quad_data), quad_data); + + memset(&clip, 0, sizeof(clip)); + + memset(&draw, 0, sizeof(draw)); + draw.mode = PIPE_PRIM_QUADS; + draw.count = 4; + draw.instance_count = 1; + draw.max_index = ~0; + } + + void blit(struct pipe_surface* surf, struct pipe_sampler_view* view, unsigned x, unsigned y, unsigned w, unsigned h) + { + struct pipe_framebuffer_state fb; + memset(&fb, 0, sizeof(fb)); + fb.nr_cbufs = 1; + fb.cbufs[0] = surf; + fb.width = surf->width; + fb.height = surf->height; + + struct pipe_viewport_state viewport; + float half_width = w * 0.5f; + float half_height = h * 0.5f; + viewport.scale[0] = half_width; + viewport.scale[1] = half_height; + viewport.scale[2] = 1.0f; + viewport.scale[3] = 1.0f; + viewport.translate[0] = x + half_width; + viewport.translate[1] = y + half_height; + viewport.translate[2] = 0.0f; + viewport.translate[3] = 1.0f; + + bool stretch = view->texture->width0 != w || view->texture->height0 != h; + if(pipe->render_condition) + pipe->render_condition(pipe, 0, 0); + pipe->set_framebuffer_state(pipe, &fb); + pipe->bind_fragment_sampler_states(pipe, 1, &sampler[stretch]); + pipe->set_viewport_state(pipe, &viewport); + pipe->set_clip_state(pipe, &clip); + pipe->bind_rasterizer_state(pipe, rasterizer); + pipe->bind_depth_stencil_alpha_state(pipe, zsa); + pipe->bind_blend_state(pipe, blend); + pipe->bind_vertex_elements_state(pipe, elements); + pipe->set_vertex_buffers(pipe, 1, &vbuf); + pipe->bind_fs_state(pipe, fs); + pipe->bind_vs_state(pipe, vs); + if(pipe->bind_gs_state) + pipe->bind_gs_state(pipe, 0); + if(pipe->bind_stream_output_state) + pipe->bind_stream_output_state(pipe, 0); + pipe->set_fragment_sampler_views(pipe, 1, &view); + + pipe->draw_vbo(pipe, &draw); + } + + ~dxgi_blitter() + { + pipe->delete_blend_state(pipe, blend); + pipe->delete_rasterizer_state(pipe, rasterizer); + pipe->delete_depth_stencil_alpha_state(pipe, zsa); + pipe->delete_sampler_state(pipe, sampler[0]); + pipe->delete_sampler_state(pipe, sampler[1]); + pipe->delete_vertex_elements_state(pipe, elements); + pipe->delete_vs_state(pipe, vs); + pipe->delete_fs_state(pipe, fs); + pipe->screen->resource_destroy(pipe->screen, vbuf.buffer); + } +}; + +struct GalliumDXGISwapChain : public GalliumDXGIObject<IDXGISwapChain, GalliumDXGIFactory> +{ + ComPtr<IDXGIDevice>dxgi_device; + ComPtr<IGalliumDevice>gallium_device; + ComPtr<GalliumDXGIAdapter> adapter; + ComPtr<IDXGIOutput> target; + + DXGI_SWAP_CHAIN_DESC desc; + + struct native_surface* surface; + const struct native_config* config; + + void* window; + struct pipe_resource* resources[NUM_NATIVE_ATTACHMENTS]; + int width; + int height; + unsigned seq_num; + bool ever_validated; + bool needs_validation; + unsigned present_count; + + ComPtr<IDXGISurface> buffer0; + struct pipe_resource* gallium_buffer0; + struct pipe_sampler_view* gallium_buffer0_view; + + struct pipe_context* pipe; + bool owns_pipe; + + BOOL fullscreen; + + std::auto_ptr<dxgi_blitter> blitter; + bool formats_compatible; + + GalliumDXGISwapChain(GalliumDXGIFactory* factory, IUnknown* p_device, const DXGI_SWAP_CHAIN_DESC& p_desc) + : GalliumDXGIObject<IDXGISwapChain, GalliumDXGIFactory>(factory), desc(p_desc), surface(0) + { + HRESULT hr; + + hr = p_device->QueryInterface(IID_IGalliumDevice, (void**)&gallium_device); + if(!SUCCEEDED(hr)) + throw hr; + + hr = p_device->QueryInterface(IID_IDXGIDevice, (void**)&dxgi_device); + if(!SUCCEEDED(hr)) + throw hr; + + hr = dxgi_device->GetAdapter((IDXGIAdapter**)&adapter); + if(!SUCCEEDED(hr)) + throw hr; + + memset(resources, 0, sizeof(resources)); + + if(desc.SwapEffect == DXGI_SWAP_EFFECT_SEQUENTIAL && desc.BufferCount != 1) + { + std::cerr << "Gallium DXGI: if DXGI_SWAP_EFFECT_SEQUENTIAL is specified, only buffer_count == 1 is implemented, but " << desc.BufferCount << " was specified: ignoring this" << std::endl; + // change the returned desc, so that the application might perhaps notice what we did and react well + desc.BufferCount = 1; + } + + pipe = gallium_device->GetGalliumContext(); + owns_pipe = false; + if(!pipe) + { + pipe = adapter->display->screen->context_create(adapter->display->screen, 0); + owns_pipe = true; + } + + blitter.reset(new dxgi_blitter(pipe)); + window = 0; + + hr = resolve_zero_width_height(true); + if(!SUCCEEDED(hr)) + throw hr; + } + + void init_for_window() + { + if(surface) + { + surface->destroy(surface); + surface = 0; + } + + unsigned config_num; + if(!strcmp(parent->platform->name, "X11")) + { + XWindowAttributes xwa; + XGetWindowAttributes((Display*)parent->display, (Window)window, &xwa); + assert(adapter->configs_by_native_visual_id.count(xwa.visual->visualid)); + config_num = adapter->configs_by_native_visual_id[xwa.visual->visualid]; + } + else + { + enum pipe_format format = dxgi_to_pipe_format[desc.BufferDesc.Format]; + if(!adapter->configs_by_pipe_format.count(format)) + { + if(adapter->configs_by_pipe_format.empty()) + throw E_FAIL; + // TODO: choose the best match + format = (pipe_format)adapter->configs_by_pipe_format.begin()->first; + } + // TODO: choose the best config + config_num = adapter->configs_by_pipe_format.find(format)->second; + } + + config = adapter->configs[config_num]; + surface = adapter->display->create_window_surface(adapter->display, (EGLNativeWindowType)window, config); + surface->user_data = this; + + width = 0; + height = 0; + seq_num = 0; + present_count = 0; + needs_validation = true; + ever_validated = false; + + formats_compatible = util_is_format_compatible( + util_format_description(dxgi_to_pipe_format[desc.BufferDesc.Format]), + util_format_description(config->color_format)); + } + + ~GalliumDXGISwapChain() + { + if(owns_pipe) + pipe->destroy(pipe); + } + + virtual HRESULT STDMETHODCALLTYPE GetDevice( + REFIID riid, + void **pdevice) + { + return dxgi_device->QueryInterface(riid, pdevice); + } + + HRESULT create_buffer0() + { + HRESULT hr; + ComPtr<IDXGISurface> new_buffer0; + DXGI_USAGE usage = DXGI_USAGE_BACK_BUFFER | DXGI_USAGE_RENDER_TARGET_OUTPUT; + if(desc.SwapEffect == DXGI_SWAP_EFFECT_DISCARD) + usage |= DXGI_USAGE_DISCARD_ON_PRESENT; + // for our blitter + usage |= DXGI_USAGE_SHADER_INPUT; + + DXGI_SURFACE_DESC surface_desc; + surface_desc.Format = desc.BufferDesc.Format; + surface_desc.Width = desc.BufferDesc.Width; + surface_desc.Height = desc.BufferDesc.Height; + surface_desc.SampleDesc = desc.SampleDesc; + hr = dxgi_device->CreateSurface(&surface_desc, 1, usage, 0, &new_buffer0); + if(!SUCCEEDED(hr)) + return hr; + + ComPtr<IGalliumResource> gallium_resource; + hr = new_buffer0->QueryInterface(IID_IGalliumResource, (void**)&gallium_resource); + if(!SUCCEEDED(hr)) + return hr; + + struct pipe_resource* new_gallium_buffer0 = gallium_resource->GetGalliumResource(); + if(!new_gallium_buffer0) + return E_FAIL; + + buffer0.reset(new_buffer0.steal()); + gallium_buffer0 = new_gallium_buffer0; + struct pipe_sampler_view templat; + memset(&templat, 0, sizeof(templat)); + templat.texture = gallium_buffer0; + templat.swizzle_r = 0; + templat.swizzle_g = 1; + templat.swizzle_b = 2; + templat.swizzle_a = 3; + templat.format = gallium_buffer0->format; + gallium_buffer0_view = pipe->create_sampler_view(pipe, gallium_buffer0, &templat); + return S_OK; + } + + bool validate() + { + unsigned new_seq_num; + needs_validation = false; + + if(!surface->validate(surface, (1 << NATIVE_ATTACHMENT_BACK_LEFT) | (1 << NATIVE_ATTACHMENT_FRONT_LEFT), &new_seq_num, resources, &width, &height)) + return false; + + if(!ever_validated || seq_num != new_seq_num) + { + seq_num = new_seq_num; + ever_validated = true; + } + return true; + } + + HRESULT resolve_zero_width_height(bool force = false) + { + if(!force && desc.BufferDesc.Width && desc.BufferDesc.Height) + return S_OK; + + unsigned width, height; + HRESULT hr = parent->backend->GetPresentSize(desc.OutputWindow, &width, &height); + if(!SUCCEEDED(hr)) + return hr; + + // On Windows, 8 is used, and a debug message saying so gets printed + if(!width) + width = 8; + if(!height) + height = 8; + + if(!desc.BufferDesc.Width) + desc.BufferDesc.Width = width; + if(!desc.BufferDesc.Height) + desc.BufferDesc.Height = height; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE Present( + UINT sync_interval, + UINT flags) + { + HRESULT hr; + if(flags & DXGI_PRESENT_TEST) + return parent->backend->TestPresent(desc.OutputWindow); + + if(!buffer0) + { + HRESULT hr = create_buffer0(); + if(!SUCCEEDED(hr)) + return hr; + } + + void* cur_window = 0; + RECT rect; + RGNDATA* rgndata; + BOOL preserve_aspect_ratio; + unsigned dst_w, dst_h; + bool db; + struct pipe_resource* dst; + struct pipe_resource* src; + struct pipe_surface* dst_surface; + + void* present_cookie; + hr = parent->backend->BeginPresent(desc.OutputWindow, &present_cookie, &cur_window, &rect, &rgndata, &preserve_aspect_ratio); + if(hr != S_OK) + return hr; + + if(!cur_window || rect.left >= rect.right || rect.top >= rect.bottom) + goto end_present; + + if(cur_window != window) + { + window = cur_window; + init_for_window(); + } + + if(needs_validation) + { + if(!validate()) + return DXGI_ERROR_DEVICE_REMOVED; + } + + db = !!(config->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT)); + dst = resources[db ? NATIVE_ATTACHMENT_BACK_LEFT : NATIVE_ATTACHMENT_FRONT_LEFT]; + src = gallium_buffer0; + dst_surface = 0; + + assert(src); + assert(dst); + + /* TODO: sharing the context for blitting won't work correctly if queries are active + * Hopefully no one is crazy enough to keep queries active while presenting, expecting + * sensible results. + * We could alternatively force using another context, but that might cause inefficiency issues + */ + + if((unsigned)rect.right > dst->width0) + rect.right = dst->width0; + if((unsigned)rect.bottom > dst->height0) + rect.bottom = dst->height0; + if(rect.left > rect.right) + rect.left = rect.right; + if(rect.top > rect.bottom) + rect.top = rect.bottom; + + if(rect.left >= rect.right && rect.top >= rect.bottom) + goto end_present; + + dst_w = rect.right - rect.left; + dst_h = rect.bottom - rect.top; + + // TODO: add support for rgndata +// if(preserve_aspect_ratio || !rgndata) + if(1) + { + unsigned blit_x, blit_y, blit_w, blit_h; + float black[4] = {0, 0, 0, 0}; + + if(!formats_compatible || src->width0 != dst_w || src->height0 != dst_h) + dst_surface = pipe->screen->get_tex_surface(pipe->screen, dst, 0, 0, 0, PIPE_BIND_RENDER_TARGET); + + if(preserve_aspect_ratio) + { + int delta = src->width0 * dst_h - dst_w * src->height0; + if(delta > 0) + { + blit_w = dst_w; + blit_h = dst_w * src->height0 / src->width0; + } + else if(delta < 0) + { + blit_w = dst_h * src->width0 / src->height0; + blit_h = dst_h; + } + else + { + blit_w = dst_w; + blit_h = dst_h; + } + + blit_x = (dst_w - blit_w) >> 1; + blit_y = (dst_h - blit_h) >> 1; + } + else + { + blit_x = 0; + blit_y = 0; + blit_w = dst_w; + blit_h = dst_h; + } + + if(blit_x) + pipe->clear_render_target(pipe, dst_surface, black, rect.left, rect.top, blit_x, dst_h); + if(blit_y) + pipe->clear_render_target(pipe, dst_surface, black, rect.left, rect.top, dst_w, blit_y); + + if(formats_compatible && blit_w == src->width0 && blit_h == src->height0) + { + pipe_subresource sr; + sr.face = 0; + sr.level = 0; + pipe->resource_copy_region(pipe, dst, sr, rect.left, rect.top, 0, src, sr, 0, 0, 0, blit_w, blit_h); + } + else + { + blitter->blit(dst_surface, gallium_buffer0_view, rect.left + blit_x, rect.top + blit_y, blit_w, blit_h); + if(!owns_pipe) + gallium_device->RestoreGalliumState(); + } + + if(blit_w != dst_w) + pipe->clear_render_target(pipe, dst_surface, black, rect.left + blit_x + blit_w, rect.top, dst_w - blit_x - blit_w, dst_h); + if(blit_h != dst_h) + pipe->clear_render_target(pipe, dst_surface, black, rect.left, rect.top + blit_y + blit_h, dst_w, dst_h - blit_y - blit_h); + } + + if(dst_surface) + pipe->screen->tex_surface_destroy(dst_surface); + + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, 0); + + if(db) + { + if(!surface->swap_buffers(surface)) + return DXGI_ERROR_DEVICE_REMOVED; + } + else + { + if(!surface->flush_frontbuffer(surface)) + return DXGI_ERROR_DEVICE_REMOVED; + } + +end_present: + parent->backend->EndPresent(desc.OutputWindow, present_cookie); + + ++present_count; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetBuffer( + UINT Buffer, + REFIID riid, + void **ppSurface) + { + if(Buffer > 0) + { + if(desc.SwapEffect == DXGI_SWAP_EFFECT_SEQUENTIAL) + std::cerr << "DXGI unimplemented: GetBuffer(n) with n > 0 not supported, returning buffer 0 instead!" << std::endl; + else + std::cerr << "DXGI error: in GetBuffer(n), n must be 0 for DXGI_SWAP_EFFECT_DISCARD\n" << std::endl; + } + + if(!buffer0) + { + HRESULT hr = create_buffer0(); + if(!SUCCEEDED(hr)) + return hr; + } + return buffer0->QueryInterface(riid, ppSurface); + } + + /* TODO: implement somehow */ + virtual HRESULT STDMETHODCALLTYPE SetFullscreenState( + BOOL fullscreen, + IDXGIOutput *target) + { + fullscreen = fullscreen; + target = target; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetFullscreenState( + BOOL *out_fullscreen, + IDXGIOutput **out_target) + { + if(out_fullscreen) + *out_fullscreen = fullscreen; + if(out_target) + *out_target = target.ref(); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetDesc( + DXGI_SWAP_CHAIN_DESC *out_desc) + { + *out_desc = desc; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE ResizeBuffers( + UINT buffer_count, + UINT width, + UINT height, + DXGI_FORMAT new_format, + UINT swap_chain_flags) + { + if(buffer0) + { + buffer0.p->AddRef(); + ULONG v = buffer0.p->Release(); + // we must fail if there are any references to buffer0 other than ours + if(v > 1) + return E_FAIL; + pipe_sampler_view_reference(&gallium_buffer0_view, 0); + buffer0 = (IUnknown*)NULL; + gallium_buffer0 = 0; + } + + if(desc.SwapEffect != DXGI_SWAP_EFFECT_SEQUENTIAL) + desc.BufferCount = buffer_count; + desc.BufferDesc.Format = new_format; + desc.BufferDesc.Width = width; + desc.BufferDesc.Height = height; + desc.Flags = swap_chain_flags; + return resolve_zero_width_height(); + } + + virtual HRESULT STDMETHODCALLTYPE ResizeTarget( + const DXGI_MODE_DESC *out_new_target_parameters) + { + /* TODO: implement */ + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetContainingOutput( + IDXGIOutput **out_output) + { + *out_output = adapter->outputs[0].ref(); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetFrameStatistics( + DXGI_FRAME_STATISTICS *out_stats) + { + memset(out_stats, 0, sizeof(*out_stats)); +#ifdef _WIN32 + QueryPerformanceCounter(&out_stats->SyncQPCTime); +#endif + out_stats->PresentCount = present_count; + out_stats->PresentRefreshCount = present_count; + out_stats->SyncRefreshCount = present_count; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetLastPresentCount( + UINT *last_present_count) + { + *last_present_count = present_count; + return S_OK; + } +}; + +static void GalliumDXGISwapChainRevalidate(IDXGISwapChain* swap_chain) +{ + ((GalliumDXGISwapChain*)swap_chain)->needs_validation = true; +} + +static HRESULT GalliumDXGIAdapterCreate(GalliumDXGIFactory* factory, const struct native_platform* platform, void* dpy, IDXGIAdapter1** out_adapter) +{ + try + { + *out_adapter = new GalliumDXGIAdapter(factory, platform, dpy); + return S_OK; + } + catch(HRESULT hr) + { + return hr; + } +} + +static HRESULT GalliumDXGIOutputCreate(GalliumDXGIAdapter* adapter, const std::string& name, const struct native_connector* connector, IDXGIOutput** out_output) +{ + try + { + *out_output = new GalliumDXGIOutput(adapter, name, connector); + return S_OK; + } + catch(HRESULT hr) + { + return hr; + } +} + +static HRESULT GalliumDXGISwapChainCreate(GalliumDXGIFactory* factory, IUnknown* device, const DXGI_SWAP_CHAIN_DESC& desc, IDXGISwapChain** out_swap_chain) +{ + try + { + *out_swap_chain = new GalliumDXGISwapChain(factory, device, desc); + return S_OK; + } + catch(HRESULT hr) + { + return hr; + } +} + +struct dxgi_binding +{ + const struct native_platform* platform; + void* display; + IGalliumDXGIBackend* backend; +}; + +static dxgi_binding dxgi_default_binding; +static __thread dxgi_binding dxgi_thread_binding; + +void STDMETHODCALLTYPE GalliumDXGIUseNothing() +{ + dxgi_thread_binding.platform = 0; + dxgi_thread_binding.display = 0; + if(dxgi_thread_binding.backend) + dxgi_thread_binding.backend->Release(); + dxgi_thread_binding.backend = 0; +} + +#ifdef GALLIUM_DXGI_USE_X11 +void STDMETHODCALLTYPE GalliumDXGIUseX11Display(Display* dpy, IGalliumDXGIBackend* backend) +{ + GalliumDXGIUseNothing(); + dxgi_thread_binding.platform = native_get_x11_platform(); + dxgi_thread_binding.display = dpy; + + if(backend) + { + dxgi_thread_binding.backend = backend; + backend->AddRef(); + } +} +#endif + +/* +#ifdef GALLIUM_DXGI_USE_DRM +void STDMETHODCALLTYPE GalliumDXGIUseDRMCard(int fd) +{ + GalliumDXGIUseNothing(); + dxgi_thread_binding.platform = native_get_drm_platform(); + dxgi_thread_binding.display = (void*)fd; + dxgi_thread_binding.backend = 0; +} +#endif + +#ifdef GALLIUM_DXGI_USE_FBDEV +void STDMETHODCALLTYPE GalliumDXGIUseFBDev(int fd) +{ + GalliumDXGIUseNothing(); + dxgi_thread_binding.platform = native_get_fbdev_platform(); + dxgi_thread_binding.display = (void*)fd; + dxgi_thread_binding.backend = 0; +} +#endif + +#ifdef GALLIUM_DXGI_USE_GDI +void STDMETHODCALLTYPE GalliumDXGIUseHDC(HDC hdc, PFNHWNDRESOLVER resolver, void* resolver_cookie) +{ + GalliumDXGIUseNothing(); + dxgi_thread_binding.platform = native_get_gdi_platform(); + dxgi_thread_binding.display = (void*)hdc; + dxgi_thread_binding.backend = 0; +} +#endif +*/ +void STDMETHODCALLTYPE GalliumDXGIMakeDefault() +{ + if(dxgi_default_binding.backend) + dxgi_default_binding.backend->Release(); + dxgi_default_binding = dxgi_thread_binding; + if(dxgi_default_binding.backend) + dxgi_default_binding.backend->AddRef(); +} + + /* TODO: why did Microsoft add this? should we do something different for DXGI 1.0 and 1.1? + * Or perhaps what they actually mean is "only create a single factory in your application"? + * TODO: should we use a singleton here, so we never have multiple DXGI objects for the same thing? */ + HRESULT STDMETHODCALLTYPE CreateDXGIFactory1( + REFIID riid, + void **out_factory +) + { + GalliumDXGIFactory* factory; + *out_factory = 0; + if(dxgi_thread_binding.platform) + factory = new GalliumDXGIFactory(dxgi_thread_binding.platform, dxgi_thread_binding.display, dxgi_thread_binding.backend); + else if(dxgi_default_binding.platform) + factory = new GalliumDXGIFactory(dxgi_default_binding.platform, dxgi_default_binding.display, dxgi_default_binding.backend); + else + factory = new GalliumDXGIFactory(native_get_x11_platform(), NULL, NULL); + HRESULT hres = factory->QueryInterface(riid, out_factory); + factory->Release(); + return hres; + } + + HRESULT STDMETHODCALLTYPE CreateDXGIFactory( + REFIID riid, + void **out_factor +) + { + return CreateDXGIFactory1(riid, out_factor); + } diff --git a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_private.h b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_private.h new file mode 100644 index 0000000000..187a0f986a --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_private.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DXGI_PRIVATE_H_ +#define DXGI_PRIVATE_H_ + +#include <vector> +#include <string> +#include <sstream> + +#include "d3d1xstutil.h" + +#include <dxgi.h> +#include <d3d11.h> +#include <galliumcom.h> +#include <galliumdxgi.h> + +struct native_display; + +struct pipe_screen * +dxgi_loader_create_drm_screen(struct native_display* dpy, const char *name, int fd); + +struct pipe_screen * +dxgi_loader_create_sw_screen(struct native_display* dpy, struct sw_winsys *ws); + +#endif /* DXGI_PRIVATE_H_ */ diff --git a/src/gallium/state_trackers/d3d1x/dxgid3d10/Makefile b/src/gallium/state_trackers/d3d1x/dxgid3d10/Makefile new file mode 100644 index 0000000000..85f41e8158 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/dxgid3d10/Makefile @@ -0,0 +1,4 @@ +LIBNAME=dxgid3d10 +CPP_SOURCES=$(wildcard *.cpp) +LIBRARY_INCLUDES=-I../gd3dapi -I../d3dapi -I../w32api -I../d3d1xstutil/include -I../include -I../../../include -I../../../auxiliary +include ../Makefile.inc diff --git a/src/gallium/state_trackers/d3d1x/dxgid3d10/dxgid3d10.cpp b/src/gallium/state_trackers/d3d1x/dxgid3d10/dxgid3d10.cpp new file mode 100644 index 0000000000..96073d4ebc --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/dxgid3d10/dxgid3d10.cpp @@ -0,0 +1,149 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "software"), to deal in the software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the software, and to + * permit persons to whom the software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "d3d1xstutil.h" +#include "galliumd3d10_1.h" +#include <dxgi.h> +#include <pipe/p_screen.h> +#include <pipe/p_context.h> + +HRESULT D3D10CreateDevice1( + IDXGIAdapter *adapter, + D3D10_DRIVER_TYPE driver_type, + HMODULE software, + unsigned flags, + D3D10_FEATURE_LEVEL1 hardware_level, + unsigned sdk_version, + ID3D10Device1 **out_device +) +{ + HRESULT hr; + ComPtr<IDXGIAdapter1> adapter_to_release; + if(!adapter) + { + ComPtr<IDXGIFactory1> factory; + hr = CreateDXGIFactory1(IID_IDXGIFactory1, (void**)&factory); + if(!SUCCEEDED(hr)) + return hr; + hr = factory->EnumAdapters1(0, &adapter_to_release); + if(!SUCCEEDED(hr)) + return hr; + adapter = adapter_to_release.p; + } + ComPtr<IGalliumAdapter> gallium_adapter; + hr = adapter->QueryInterface(IID_IGalliumAdapter, (void**)&gallium_adapter); + if(!SUCCEEDED(hr)) + return hr; + struct pipe_screen* screen; + // TODO: what should D3D_DRIVER_TYPE_SOFTWARE return? fast or reference? + if(driver_type == D3D10_DRIVER_TYPE_REFERENCE) + screen = gallium_adapter->GetGalliumReferenceSoftwareScreen(); + else if(driver_type == D3D10_DRIVER_TYPE_SOFTWARE || driver_type == D3D10_DRIVER_TYPE_WARP) + screen = gallium_adapter->GetGalliumFastSoftwareScreen(); + else + screen = gallium_adapter->GetGalliumScreen(); + if(!screen) + return E_FAIL; + struct pipe_context* context = screen->context_create(screen, 0); + if(!context) + return E_FAIL; + ComPtr<ID3D10Device1> device; + hr = GalliumD3D10DeviceCreate1(screen, context, TRUE, flags, adapter, &device); + if(!SUCCEEDED(hr)) + { + context->destroy(context); + return hr; + } + if(out_device) + *out_device = device.steal(); + return S_OK; +} + +HRESULT WINAPI D3D10CreateDeviceAndSwapChain1( + IDXGIAdapter* adapter, + D3D10_DRIVER_TYPE driver_type, + HMODULE software, + unsigned flags, + D3D10_FEATURE_LEVEL1 hardware_level, + unsigned sdk_version, + DXGI_SWAP_CHAIN_DESC* swap_chain_desc, + IDXGISwapChain** out_swap_chain, + ID3D10Device1** out_device +) +{ + ComPtr<ID3D10Device1> dev; + HRESULT hr; + hr = D3D10CreateDevice1(adapter, driver_type, software, flags, hardware_level, sdk_version, &dev); + if(!SUCCEEDED(hr)) + return hr; + if(out_swap_chain) + { + ComPtr<IDXGIFactory> factory; + ComPtr<IDXGIDevice> dxgi_device; + ComPtr<IDXGIAdapter> adapter; + hr = dev->QueryInterface(IID_IDXGIDevice, (void**)&dxgi_device); + if(!SUCCEEDED(hr)) + return hr; + + hr = dxgi_device->GetAdapter(&adapter); + if(!SUCCEEDED(hr)) + return hr; + + adapter->GetParent(IID_IDXGIFactory, (void**)&factory); + hr = factory->CreateSwapChain(dev.p, (DXGI_SWAP_CHAIN_DESC*)swap_chain_desc, out_swap_chain); + if(!SUCCEEDED(hr)) + return hr; + } + if(out_device) + *out_device = dev.steal(); + return hr; +} + +HRESULT D3D10CreateDevice( + IDXGIAdapter *adapter, + D3D10_DRIVER_TYPE driver_type, + HMODULE software, + unsigned flags, + unsigned sdk_version, + ID3D10Device **out_device +) +{ + return D3D10CreateDevice1(adapter, driver_type, software, flags, D3D10_FEATURE_LEVEL_10_0, sdk_version, (ID3D10Device1**)out_device); +} + +HRESULT WINAPI D3D10CreateDeviceAndSwapChain( + IDXGIAdapter* adapter, + D3D10_DRIVER_TYPE driver_type, + HMODULE software, + unsigned flags, + unsigned sdk_version, + DXGI_SWAP_CHAIN_DESC* swap_chain_desc, + IDXGISwapChain** out_swap_chain, + ID3D10Device** out_device +) +{ + return D3D10CreateDeviceAndSwapChain1(adapter, driver_type, software, flags, D3D10_FEATURE_LEVEL_10_0, sdk_version, swap_chain_desc, out_swap_chain, (ID3D10Device1**)out_device); +} diff --git a/src/gallium/state_trackers/d3d1x/dxgid3d11/Makefile b/src/gallium/state_trackers/d3d1x/dxgid3d11/Makefile new file mode 100644 index 0000000000..591d1bea94 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/dxgid3d11/Makefile @@ -0,0 +1,4 @@ +LIBNAME=dxgid3d11 +CPP_SOURCES=$(wildcard *.cpp) +LIBRARY_INCLUDES=-I../gd3dapi -I../d3dapi -I../w32api -I../d3d1xstutil/include -I../include -I../../../include -I../../../auxiliary +include ../Makefile.inc diff --git a/src/gallium/state_trackers/d3d1x/dxgid3d11/dxgid3d11.cpp b/src/gallium/state_trackers/d3d1x/dxgid3d11/dxgid3d11.cpp new file mode 100644 index 0000000000..1b1cb907d3 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/dxgid3d11/dxgid3d11.cpp @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "software"), to deal in the software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the software, and to + * permit persons to whom the software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "d3d1xstutil.h" +#include "galliumd3d11.h" +#include <dxgi.h> +#include <pipe/p_screen.h> +#include <pipe/p_context.h> + +HRESULT D3D11CreateDevice( + IDXGIAdapter *adapter, + D3D_DRIVER_TYPE driver_type, + HMODULE software, + unsigned flags, + const D3D_FEATURE_LEVEL *feature_levels, + unsigned num_feature_levels, + unsigned sdk_version, + ID3D11Device **out_device, + D3D_FEATURE_LEVEL *feature_level, + ID3D11DeviceContext **out_immediate_context +) +{ + HRESULT hr; + ComPtr<IDXGIAdapter1> adapter_to_release; + if(!adapter) + { + ComPtr<IDXGIFactory1> factory; + hr = CreateDXGIFactory1(IID_IDXGIFactory1, (void**)&factory); + if(!SUCCEEDED(hr)) + return hr; + hr = factory->EnumAdapters1(0, &adapter_to_release); + if(!SUCCEEDED(hr)) + return hr; + adapter = adapter_to_release.p; + } + ComPtr<IGalliumAdapter> gallium_adapter; + hr = adapter->QueryInterface(IID_IGalliumAdapter, (void**)&gallium_adapter); + if(!SUCCEEDED(hr)) + return hr; + struct pipe_screen* screen; + // TODO: what should D3D_DRIVER_TYPE_SOFTWARE return? fast or reference? + if(driver_type == D3D_DRIVER_TYPE_REFERENCE) + screen = gallium_adapter->GetGalliumReferenceSoftwareScreen(); + else if(driver_type == D3D_DRIVER_TYPE_SOFTWARE || driver_type == D3D_DRIVER_TYPE_WARP) + screen = gallium_adapter->GetGalliumFastSoftwareScreen(); + else + screen = gallium_adapter->GetGalliumScreen(); + if(!screen) + return E_FAIL; + struct pipe_context* context = screen->context_create(screen, 0); + if(!context) + return E_FAIL; + ComPtr<ID3D11Device> device; + hr = GalliumD3D11DeviceCreate(screen, context, TRUE, flags, adapter, &device); + if(!SUCCEEDED(hr)) + { + context->destroy(context); + return hr; + } + if(out_immediate_context) + device->GetImmediateContext(out_immediate_context); + if(feature_level) + *feature_level = device->GetFeatureLevel(); + if(out_device) + *out_device = device.steal(); + return S_OK; +} + +HRESULT WINAPI D3D11CreateDeviceAndSwapChain( + IDXGIAdapter* adapter, + D3D_DRIVER_TYPE driver_type, + HMODULE software, + unsigned flags, + CONST D3D_FEATURE_LEVEL* feature_levels, + unsigned num_feature_levels, + unsigned sdk_version, + CONST DXGI_SWAP_CHAIN_DESC* pSwapChainDesc, + IDXGISwapChain** out_swap_chain, + ID3D11Device** out_device, + D3D_FEATURE_LEVEL* feature_level, + ID3D11DeviceContext** out_immediate_context ) +{ + ComPtr<ID3D11Device> dev; + ComPtr<ID3D11DeviceContext> ctx; + HRESULT hr; + hr = D3D11CreateDevice(adapter, driver_type, software, flags, feature_levels, num_feature_levels, sdk_version, (ID3D11Device**)&dev, feature_level, (ID3D11DeviceContext**)&ctx); + if(!SUCCEEDED(hr)) + return hr; + if(out_swap_chain) + { + ComPtr<IDXGIFactory> factory; + ComPtr<IDXGIDevice> dxgi_device; + ComPtr<IDXGIAdapter> adapter; + hr = dev->QueryInterface(IID_IDXGIDevice, (void**)&dxgi_device); + if(!SUCCEEDED(hr)) + return hr; + + hr = dxgi_device->GetAdapter(&adapter); + if(!SUCCEEDED(hr)) + return hr; + + adapter->GetParent(IID_IDXGIFactory, (void**)&factory); + hr = factory->CreateSwapChain(dev.p, (DXGI_SWAP_CHAIN_DESC*)pSwapChainDesc, out_swap_chain); + if(!SUCCEEDED(hr)) + return hr; + } + if(out_device) + *out_device = dev.steal(); + if(out_immediate_context) + *out_immediate_context = ctx.steal(); + return hr; +} diff --git a/src/gallium/state_trackers/d3d1x/gd3d10/Makefile b/src/gallium/state_trackers/d3d1x/gd3d10/Makefile new file mode 100644 index 0000000000..300149d384 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d10/Makefile @@ -0,0 +1,20 @@ +LIBNAME=gd3d10 +CPP_SOURCES=d3d10.generated.cpp +LIBRARY_INCLUDES=-I../gd3d1x -I../gd3dapi -I../d3dapi -I../w32api -I../d3d1xstutil/include -I../d3d1xshader/include -I../../../include -I../../../auxiliary -I../../../state_trackers/egl/common + +GEN_D3D10=perl d3d10.pl + +include ../Makefile.inc + +d3d10.generated.o: d3d10_objects.generated.h d3d10_screen.generated.h d3d10_context.generated.h d3d10_misc.generated.h + +d3d10.generated.cpp: ../gd3d11/d3d11.cpp d3d10.pl + $(GEN_D3D10) $< > $@ +d3d10_objects.generated.h: ../gd3d11/d3d11_objects.h d3d10.pl + $(GEN_D3D10) $< > $@ +d3d10_screen.generated.h: ../gd3d11/d3d11_screen.h d3d10.pl + $(GEN_D3D10) $< > $@ +d3d10_context.generated.h: ../gd3d11/d3d11_context.h d3d10.pl + $(GEN_D3D10) $< > $@ +d3d10_misc.generated.h: ../gd3d11/d3d11_misc.h d3d10.pl + $(GEN_D3D10) $< > $@ diff --git a/src/gallium/state_trackers/d3d1x/gd3d10/d3d10.pl b/src/gallium/state_trackers/d3d1x/gd3d10/d3d10.pl new file mode 100755 index 0000000000..4687b8365a --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d10/d3d10.pl @@ -0,0 +1,12 @@ +#!/usr/bin/perl +while(<>) +{ + s/D3D11_SRV_DIMENSION_/D3D10_1_SRV_DIMENSION_/g; + s/D3D11/D3D10/g; + s/D3D10_SIGNATURE_PARAMETER_DESC/D3D11_SIGNATURE_PARAMETER_DESC/g; + s/D3D_FEATURE_LEVEL_/D3D10_FEATURE_LEVEL_/g; + s/D3D_FEATURE_LEVEL/D3D10_FEATURE_LEVEL1/g; + s/^#define API 11/#define API 10/; + s/^(#include "d3d1)1(_[^.]*)(.h")/${1}0$2.generated$3/; + print $_; +} diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/Makefile b/src/gallium/state_trackers/d3d1x/gd3d11/Makefile new file mode 100644 index 0000000000..650c11d3d0 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d11/Makefile @@ -0,0 +1,6 @@ +LIBNAME=gd3d11 +CPP_SOURCES=d3d11.cpp +LIBRARY_INCLUDES=-I../gd3d1x -I../gd3dapi -I../d3dapi -I../w32api -I../d3d1xstutil/include -I../d3d1xshader/include -I../../../include -I../../../auxiliary -I../../../state_trackers/egl/common + +include ../Makefile.inc + diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11.cpp b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11.cpp new file mode 100644 index 0000000000..27cfebc1b9 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11.cpp @@ -0,0 +1,241 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "d3d1x_private.h" + +extern "C" +{ +#include "util/u_gen_mipmap.h" +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_dump.h" +#include "cso_cache/cso_context.h" +} + + +// the perl script will change this to 10 for d3d10, and also do s/D3D11/D3D10 in the whole file +#define API 11 + +#if API >= 11 +#define DX10_ONLY(x) +#else +#define DX10_ONLY(x) x +#endif + +typedef D3D10_MAPPED_TEXTURE3D D3D10_MAPPED_SUBRESOURCE; + +// used to make QueryInterface know the IIDs of the interface and its ancestors +COM_INTERFACE(ID3D11DeviceChild, IUnknown) +COM_INTERFACE(ID3D11InputLayout, ID3D11DeviceChild) +COM_INTERFACE(ID3D11DepthStencilState, ID3D11DeviceChild) +COM_INTERFACE(ID3D11BlendState, ID3D11DeviceChild) +COM_INTERFACE(ID3D11RasterizerState, ID3D11DeviceChild) +COM_INTERFACE(ID3D11SamplerState, ID3D11DeviceChild) +COM_INTERFACE(ID3D11Resource, ID3D11DeviceChild) +COM_INTERFACE(ID3D11Buffer, ID3D11Resource) +COM_INTERFACE(ID3D11Texture1D, ID3D11Resource) +COM_INTERFACE(ID3D11Texture2D, ID3D11Resource) +COM_INTERFACE(ID3D11Texture3D, ID3D11Resource) +COM_INTERFACE(ID3D11View, ID3D11DeviceChild) +COM_INTERFACE(ID3D11ShaderResourceView, ID3D11View) +COM_INTERFACE(ID3D11RenderTargetView, ID3D11View) +COM_INTERFACE(ID3D11DepthStencilView, ID3D11View) +COM_INTERFACE(ID3D11VertexShader, ID3D11DeviceChild) +COM_INTERFACE(ID3D11GeometryShader, ID3D11DeviceChild) +COM_INTERFACE(ID3D11PixelShader, ID3D11DeviceChild) +COM_INTERFACE(ID3D11Asynchronous, ID3D11DeviceChild) +COM_INTERFACE(ID3D11Query, ID3D11Asynchronous) +COM_INTERFACE(ID3D11Predicate, ID3D11Query) +COM_INTERFACE(ID3D11Counter, ID3D11Asynchronous) +COM_INTERFACE(ID3D11Device, IUnknown) + +#if API >= 11 +COM_INTERFACE(ID3D11UnorderedAccessView, ID3D11View) +COM_INTERFACE(ID3D11HullShader, ID3D11DeviceChild) +COM_INTERFACE(ID3D11DomainShader, ID3D11DeviceChild) +COM_INTERFACE(ID3D11ComputeShader, ID3D11DeviceChild) +COM_INTERFACE(ID3D11ClassInstance, ID3D11DeviceChild) +COM_INTERFACE(ID3D11ClassLinkage, ID3D11DeviceChild) +COM_INTERFACE(ID3D11CommandList, ID3D11DeviceChild) +COM_INTERFACE(ID3D11DeviceContext, ID3D11DeviceChild) +#else +COM_INTERFACE(ID3D10BlendState1, ID3D10BlendState) +COM_INTERFACE(ID3D10ShaderResourceView1, ID3D10ShaderResourceView) +COM_INTERFACE(ID3D10Device1, ID3D10Device) +#endif + +struct GalliumD3D11Screen; + +#if API >= 11 +static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe); +static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context); +static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context); +static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* device); +#endif + +static inline pipe_box d3d11_to_pipe_box(struct pipe_resource* resource, unsigned level, const D3D11_BOX* pBox) +{ + pipe_box box; + if(pBox) + { + box.x = pBox->left; + box.y = pBox->top; + box.z = pBox->front; + box.width = pBox->right - pBox->left; + box.height = pBox->bottom - pBox->top; + box.depth = pBox->back - pBox->front; + } + else + { + box.x = box.y = box.z = 0; + box.width = u_minify(resource->width0, level); + box.height = u_minify(resource->height0, level); + box.depth = u_minify(resource->depth0, level); + } + return box; +} + +struct GalliumD3D11Caps +{ + bool so; + bool gs; + bool queries; + bool render_condition; + unsigned constant_buffers[D3D11_STAGES]; + unsigned stages; + unsigned stages_with_sampling; +}; + +typedef GalliumDXGIDevice< + GalliumMultiComObject< +#if API >= 11 + GalliumPrivateDataComObject<ID3D11Device>, +#else + GalliumPrivateDataComObject<ID3D10Device1>, +#endif + IGalliumDevice + > +> GalliumD3D11ScreenBase; + +// used to avoid needing to have forward declarations of functions +// this is called "screen" because in the D3D10 case it's only part of the device +struct GalliumD3D11Screen : public GalliumD3D11ScreenBase +{ + + pipe_screen* screen; + pipe_context* immediate_pipe; + GalliumD3D11Caps screen_caps; + +#if API >= 11 + ID3D11DeviceContext* immediate_context; + ID3D11DeviceContext* get_immediate_context() + { + return immediate_context; + } +#else + GalliumD3D11Screen* get_immediate_context() + { + return this; + } +#endif + + + GalliumD3D11Screen(pipe_screen* screen, struct pipe_context* immediate_pipe, IDXGIAdapter* adapter) + : GalliumD3D11ScreenBase(adapter), screen(screen), immediate_pipe(immediate_pipe) + { + } + +#if API < 11 + // we use a D3D11-like API internally + virtual HRESULT STDMETHODCALLTYPE Map( + ID3D11Resource *pResource, + unsigned Subresource, + D3D11_MAP MapType, + unsigned MapFlags, + D3D11_MAPPED_SUBRESOURCE *pMappedResource) = 0; + virtual void STDMETHODCALLTYPE Unmap( + ID3D11Resource *pResource, + unsigned Subresource) = 0; + virtual void STDMETHODCALLTYPE Begin( + ID3D11Asynchronous *pAsync) = 0; + virtual void STDMETHODCALLTYPE End( + ID3D11Asynchronous *pAsync) = 0; + virtual HRESULT STDMETHODCALLTYPE GetData( + ID3D11Asynchronous *pAsync, + void *pData, + unsigned DataSize, + unsigned GetDataFlags) = 0; + + // TODO: maybe we should use function overloading, but that might risk silent errors, + // and cannot be exported to a C interface + virtual void UnbindBlendState(ID3D11BlendState* state) = 0; + virtual void UnbindRasterizerState(ID3D11RasterizerState* state) = 0; + virtual void UnbindDepthStencilState(ID3D11DepthStencilState* state) = 0; + virtual void UnbindInputLayout(ID3D11InputLayout* state) = 0; + virtual void UnbindPixelShader(ID3D11PixelShader* state) = 0; + virtual void UnbindVertexShader(ID3D11VertexShader* state) = 0; + virtual void UnbindGeometryShader(ID3D11GeometryShader* state) = 0; + virtual void UnbindPredicate(ID3D11Predicate* predicate) = 0; + virtual void UnbindSamplerState(ID3D11SamplerState* state) = 0; + virtual void UnbindBuffer(ID3D11Buffer* buffer) = 0; + virtual void UnbindDepthStencilView(ID3D11DepthStencilView* view) = 0; + virtual void UnbindRenderTargetView(ID3D11RenderTargetView* view) = 0; + virtual void UnbindShaderResourceView(ID3D11ShaderResourceView* view) = 0; + + void UnbindBlendState1(ID3D11BlendState1* state) + { + UnbindBlendState(state); + } + void UnbindShaderResourceView1(ID3D11ShaderResourceView1* view) + { + UnbindShaderResourceView(view); + } +#endif +}; + +#include "d3d11_objects.h" +#include "d3d11_screen.h" +#include "d3d11_context.h" +#include "d3d11_misc.h" + +#if API >= 11 +HRESULT STDMETHODCALLTYPE GalliumD3D11DeviceCreate(struct pipe_screen* screen, struct pipe_context* context, BOOL owns_context, unsigned creation_flags, IDXGIAdapter* adapter, ID3D11Device** ppDevice) +{ + if(creation_flags & D3D11_CREATE_DEVICE_SINGLETHREADED) + *ppDevice = new GalliumD3D11ScreenImpl<false>(screen, context, owns_context, creation_flags, adapter); + else + *ppDevice = new GalliumD3D11ScreenImpl<true>(screen, context, owns_context, creation_flags, adapter); + return S_OK; +} +#else +HRESULT STDMETHODCALLTYPE GalliumD3D10DeviceCreate1(struct pipe_screen* screen, struct pipe_context* context, BOOL owns_context, unsigned creation_flags, IDXGIAdapter* adapter, ID3D10Device1** ppDevice) +{ + if(creation_flags & D3D10_CREATE_DEVICE_SINGLETHREADED) + *ppDevice = new GalliumD3D10Device<false>(screen, context, owns_context, creation_flags, adapter); + else + *ppDevice = new GalliumD3D10Device<true>(screen, context, owns_context, creation_flags, adapter); + return S_OK; +} +#endif diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h new file mode 100644 index 0000000000..36110595c2 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h @@ -0,0 +1,2069 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* used to unbind things, we need 128 due to resources */ +static const void* zero_data[128]; + +#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0) +#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1) +#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2)) + +#if API >= 11 +template<typename PtrTraits> +struct GalliumD3D11DeviceContext : + public GalliumD3D11DeviceChild<ID3D11DeviceContext> +{ +#else +template<bool threadsafe> +struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe> +{ + typedef simple_ptr_traits PtrTraits; + typedef GalliumD3D10Device GalliumD3D10DeviceContext; +#endif + + refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES]; + refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout; + refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer; + refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state; + refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state; + refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state; + refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view; + refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate; + + refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT]; + refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT]; + refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT]; + refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; + refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT]; + refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT]; + +#if API >= 11 + refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT]; + refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT]; +#endif + + D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; + D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; + unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT]; + D3D11_PRIMITIVE_TOPOLOGY primitive_topology; + DXGI_FORMAT index_format; + unsigned index_offset; + BOOL render_predicate_value; + float blend_color[4]; + unsigned sample_mask; + unsigned stencil_ref; + bool depth_clamp; + + void* default_input_layout; + void* default_rasterizer; + void* default_depth_stencil; + void* default_blend; + void* default_sampler; + void* ld_sampler; + void * default_shaders[D3D11_STAGES]; + + // derived state + int primitive_mode; + struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; + struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT]; + struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT]; + struct + { + void* ld; // accessed with a -1 index from v + void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT]; + } sampler_csos[D3D11_STAGES]; + struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT]; + unsigned num_shader_resource_views[D3D11_STAGES]; + unsigned num_samplers[D3D11_STAGES]; + unsigned num_vertex_buffers; + unsigned num_render_target_views; + unsigned num_viewports; + unsigned num_scissor_rects; + unsigned num_so_targets; + + struct pipe_context* pipe; + unsigned update_flags; + + bool owns_pipe; + unsigned context_flags; + + GalliumD3D11Caps caps; + + cso_context* cso_ctx; + gen_mipmap_state* gen_mipmap; + +#if API >= 11 +#define SYNCHRONIZED do {} while(0) + + GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0) + : GalliumD3D11DeviceChild<ID3D11DeviceContext>(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags) + { + caps = device->screen_caps; + init_context(); + } + + ~GalliumD3D11DeviceContext() + { + destroy_context(); + } +#else +#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex) + + GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter) + : GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0) + { + caps = this->screen_caps; + init_context(); + } + + ~GalliumD3D10Device() + { + destroy_context(); + } +#endif + + void init_context() + { + if(!pipe->begin_query) + caps.queries = false; + if(!pipe->render_condition) + caps.render_condition = false; + if(!pipe->bind_gs_state) + { + caps.gs = false; + caps.stages = 2; + } + if(!pipe->set_stream_output_buffers) + caps.so = false; + if(!pipe->set_geometry_sampler_views) + caps.stages_with_sampling &=~ (1 << PIPE_SHADER_GEOMETRY); + if(!pipe->set_fragment_sampler_views) + caps.stages_with_sampling &=~ (1 << PIPE_SHADER_FRAGMENT); + if(!pipe->set_vertex_sampler_views) + caps.stages_with_sampling &=~ (1 << PIPE_SHADER_VERTEX); + + update_flags = 0; + + // pipeline state + memset(viewports, 0, sizeof(viewports)); + memset(scissor_rects, 0, sizeof(scissor_rects)); + memset(so_offsets, 0, sizeof(so_offsets)); + primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + index_format = DXGI_FORMAT_UNKNOWN; + index_offset = 0; + render_predicate_value = 0; + memset(blend_color, 0, sizeof(blend_color)); + sample_mask = ~0; + stencil_ref = 0; + depth_clamp = 0; + + // derived state + primitive_mode = 0; + memset(vertex_buffers, 0, sizeof(vertex_buffers)); + memset(so_buffers, 0, sizeof(so_buffers)); + memset(sampler_views, 0, sizeof(sampler_views)); + memset(sampler_csos, 0, sizeof(sampler_csos)); + memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views)); + memset(num_samplers, 0, sizeof(num_samplers)); + num_vertex_buffers = 0; + num_render_target_views = 0; + num_viewports = 0; + num_scissor_rects = 0; + num_so_targets = 0; + + default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0); + + struct pipe_rasterizer_state rasterizerd; + memset(&rasterizerd, 0, sizeof(rasterizerd)); + rasterizerd.gl_rasterization_rules = 1; + rasterizerd.cull_face = PIPE_FACE_BACK; + default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd); + + struct pipe_depth_stencil_alpha_state depth_stencild; + memset(&depth_stencild, 0, sizeof(depth_stencild)); + depth_stencild.depth.enabled = TRUE; + depth_stencild.depth.writemask = 1; + depth_stencild.depth.func = PIPE_FUNC_LESS; + default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild); + + struct pipe_blend_state blendd; + memset(&blendd, 0, sizeof(blendd)); + blendd.rt[0].colormask = 0xf; + default_blend = pipe->create_blend_state(pipe, &blendd); + + struct pipe_sampler_state samplerd; + memset(&samplerd, 0, sizeof(samplerd)); + samplerd.normalized_coords = 1; + samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR; + samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR; + samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + samplerd.border_color[0] = 1.0f; + samplerd.border_color[1] = 1.0f; + samplerd.border_color[2] = 1.0f; + samplerd.border_color[3] = 1.0f; + samplerd.min_lod = -FLT_MAX; + samplerd.max_lod = FLT_MAX; + samplerd.max_anisotropy = 1; + default_sampler = pipe->create_sampler_state(pipe, &samplerd); + + memset(&samplerd, 0, sizeof(samplerd)); + samplerd.normalized_coords = 0; + samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST; + samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER; + samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER; + samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER; + samplerd.min_lod = -FLT_MAX; + samplerd.max_lod = FLT_MAX; + samplerd.max_anisotropy = 1; + ld_sampler = pipe->create_sampler_state(pipe, &samplerd); + + for(unsigned s = 0; s < D3D11_STAGES; ++s) + { + sampler_csos[s].ld = ld_sampler; + for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i) + sampler_csos[s].v[i] = default_sampler; + } + + // TODO: should this really be empty shaders, or should they be all-passthrough? + memset(default_shaders, 0, sizeof(default_shaders)); + struct ureg_program *ureg; + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + ureg_END(ureg); + default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe); + + ureg = ureg_create(TGSI_PROCESSOR_VERTEX); + ureg_END(ureg); + default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe); + + cso_ctx = cso_create_context(pipe); + gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx); + + RestoreGalliumState(); + } + + void destroy_context() + { + util_destroy_gen_mipmap(gen_mipmap); + cso_destroy_context(cso_ctx); + + pipe->bind_vertex_elements_state(pipe, 0); + pipe->delete_vertex_elements_state(pipe, default_input_layout); + + pipe->bind_rasterizer_state(pipe, 0); + pipe->delete_rasterizer_state(pipe, default_rasterizer); + + pipe->bind_depth_stencil_alpha_state(pipe, 0); + pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil); + + pipe->bind_blend_state(pipe, 0); + pipe->delete_blend_state(pipe, default_blend); + + pipe->bind_fragment_sampler_states(pipe, 0, 0); + pipe->bind_vertex_sampler_states(pipe, 0, 0); + if(pipe->bind_geometry_sampler_states) + pipe->bind_geometry_sampler_states(pipe, 0, 0); + pipe->delete_sampler_state(pipe, default_sampler); + pipe->delete_sampler_state(pipe, ld_sampler); + + pipe->bind_fs_state(pipe, 0); + pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]); + + pipe->bind_vs_state(pipe, 0); + pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]); + + if(owns_pipe) + pipe->destroy(pipe); + } + + virtual unsigned STDMETHODCALLTYPE GetContextFlags(void) + { + return context_flags; + } +#if API >= 11 +#define SET_SHADER_EXTRA_ARGS , \ + ID3D11ClassInstance *const *ppClassInstances, \ + unsigned count +#define GET_SHADER_EXTRA_ARGS , \ + ID3D11ClassInstance **ppClassInstances, \ + unsigned *out_count +#else +#define SET_SHADER_EXTRA_ARGS +#define GET_SHADER_EXTRA_ARGS +#endif + +/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer. + * Instead, you have to pass a pointer to nulls to unbind things. + * We do the same. + * TODO: is D3D10 the same? + */ + template<unsigned s> + void xs_set_shader(GalliumD3D11Shader<>* shader) + { + if(shader != shaders[s].p) + { + shaders[s] = shader; + void* shader_cso = shader ? shader->object : default_shaders[s]; + switch(s) + { + case PIPE_SHADER_VERTEX: + pipe->bind_vs_state(pipe, shader_cso); + break; + case PIPE_SHADER_FRAGMENT: + pipe->bind_fs_state(pipe, shader_cso); + break; + case PIPE_SHADER_GEOMETRY: + pipe->bind_gs_state(pipe, shader_cso); + break; + } + update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s)); + } + } + + template<unsigned s> + void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs) + { + for(unsigned i = 0; i < count; ++i) + { + if(constbufs[i] != constant_buffers[s][i].p) + { + constant_buffers[s][i] = constbufs[i]; + if(s < caps.stages && start + i < caps.constant_buffers[s]) + pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL); + } + } + } + + template<unsigned s> + void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs) + { + int last_different = -1; + for(unsigned i = 0; i < count; ++i) + { + if(shader_resource_views[s][start + i].p != srvs[i]) + { + shader_resource_views[s][start + i] = srvs[i]; + sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0; + last_different = i; + } + } + if(last_different >= 0) + { + num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1); + update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s); + } + } + + template<unsigned s> + void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps) + { + int last_different = -1; + for(unsigned i = 0; i < count; ++i) + { + if(samplers[s][start + i].p != samps[i]) + { + samplers[s][start + i] = samps[i]; + sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler; + } + if(last_different >= 0) + { + num_samplers[s] = std::max(num_samplers[s], start + last_different + 1); + update_flags |= (UPDATE_SAMPLERS_SHIFT + s); + } + } + } + +#define IMPLEMENT_SHADER_STAGE(XS, Stage) \ + virtual void STDMETHODCALLTYPE XS##SetShader( \ + ID3D11##Stage##Shader *pShader \ + SET_SHADER_EXTRA_ARGS) \ + { \ + SYNCHRONIZED; \ + xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \ + } \ + virtual void STDMETHODCALLTYPE XS##GetShader(\ + ID3D11##Stage##Shader **ppShader \ + GET_SHADER_EXTRA_ARGS) \ + { \ + SYNCHRONIZED; \ + *ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \ + } \ + virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\ + unsigned start, \ + unsigned count, \ + ID3D11Buffer *const* constant_buffers) \ + { \ + SYNCHRONIZED; \ + xs_set_constant_buffers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11Buffer *const *)constant_buffers); \ + } \ + virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\ + unsigned start, \ + unsigned count, \ + ID3D11Buffer **out_constant_buffers) \ + { \ + SYNCHRONIZED; \ + for(unsigned i = 0; i < count; ++i) \ + out_constant_buffers[i] = constant_buffers[D3D11_STAGE_##XS][start + i].ref(); \ + } \ + virtual void STDMETHODCALLTYPE XS##SetShaderResources(\ + unsigned start, \ + unsigned count, \ + ID3D11ShaderResourceView *const *new_shader_resource_views) \ + { \ + SYNCHRONIZED; \ + xs_set_shader_resources<D3D11_STAGE_##XS>(start, count, (GalliumD3D11ShaderResourceView *const *)new_shader_resource_views); \ + } \ + virtual void STDMETHODCALLTYPE XS##GetShaderResources(\ + unsigned start, \ + unsigned count, \ + ID3D11ShaderResourceView **out_shader_resource_views) \ + { \ + SYNCHRONIZED; \ + for(unsigned i = 0; i < count; ++i) \ + out_shader_resource_views[i] = shader_resource_views[D3D11_STAGE_##XS][start + i].ref(); \ + } \ + virtual void STDMETHODCALLTYPE XS##SetSamplers(\ + unsigned start, \ + unsigned count, \ + ID3D11SamplerState *const *new_samplers) \ + { \ + SYNCHRONIZED; \ + xs_set_samplers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11SamplerState *const *)new_samplers); \ + } \ + virtual void STDMETHODCALLTYPE XS##GetSamplers( \ + unsigned start, \ + unsigned count, \ + ID3D11SamplerState **out_samplers) \ + { \ + SYNCHRONIZED; \ + for(unsigned i = 0; i < count; ++i) \ + out_samplers[i] = samplers[D3D11_STAGE_##XS][start + i].ref(); \ + } + +#define DO_VS(x) x +#define DO_GS(x) do {if(caps.gs) {x;}} while(0) +#define DO_PS(x) x +#define DO_HS(x) +#define DO_DS(x) +#define DO_CS(x) + IMPLEMENT_SHADER_STAGE(VS, Vertex) + IMPLEMENT_SHADER_STAGE(GS, Geometry) + IMPLEMENT_SHADER_STAGE(PS, Pixel) + +#if API >= 11 + IMPLEMENT_SHADER_STAGE(HS, Hull) + IMPLEMENT_SHADER_STAGE(DS, Domain) + IMPLEMENT_SHADER_STAGE(CS, Compute) + + virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews( + unsigned start, + unsigned count, + ID3D11UnorderedAccessView *const *new_unordered_access_views, + const unsigned *new_uav_initial_counts) + { + SYNCHRONIZED; + for(unsigned i = 0; i < count; ++i) + cs_unordered_access_views[start + i] = new_unordered_access_views[i]; + } + + virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews( + unsigned start, + unsigned count, + ID3D11UnorderedAccessView **out_unordered_access_views) + { + SYNCHRONIZED; + for(unsigned i = 0; i < count; ++i) + out_unordered_access_views[i] = cs_unordered_access_views[start + i].ref(); + } +#endif + + template<unsigned s> + void update_stage() + { + if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s))) + { + while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \ + --num_shader_resource_views[s]; + if((1 << s) & caps.stages_with_sampling) + { + struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS]; + unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0; + for(unsigned i = 0; i < num_views_to_bind; ++i) + { + views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]]; + } + switch(s) + { + case PIPE_SHADER_VERTEX: + pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind); + break; + case PIPE_SHADER_FRAGMENT: + pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind); + break; + case PIPE_SHADER_GEOMETRY: + pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind); + break; + } + } + } + + if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s))) + { + while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1]) + --num_samplers[s]; + if((1 << s) & caps.stages_with_sampling) + { + void* samplers_to_bind[PIPE_MAX_SAMPLERS]; + unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0; + for(unsigned i = 0; i < num_samplers_to_bind; ++i) + { + // index can be -1 to access sampler_csos[s].ld + samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]); + } + switch(s) + { + case PIPE_SHADER_VERTEX: + pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind); + break; + case PIPE_SHADER_FRAGMENT: + pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind); + break; + case PIPE_SHADER_GEOMETRY: + pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind); + break; + } + } + } + } + + void update_state() + { + update_stage<D3D11_STAGE_PS>(); + update_stage<D3D11_STAGE_VS>(); + update_stage<D3D11_STAGE_GS>(); +#if API >= 11 + update_stage<D3D11_STAGE_HS>(); + update_stage<D3D11_STAGE_DS>(); + update_stage<D3D11_STAGE_CS>(); +#endif + + if(update_flags & UPDATE_VERTEX_BUFFERS) + { + while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer) + --num_vertex_buffers; + pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers); + } + + update_flags = 0; + } + + virtual void STDMETHODCALLTYPE IASetInputLayout( + ID3D11InputLayout *new_input_layout) + { + SYNCHRONIZED; + if(new_input_layout != input_layout.p) + { + input_layout = new_input_layout; + pipe->bind_vertex_elements_state(pipe, new_input_layout ? ((GalliumD3D11InputLayout*)new_input_layout)->object : default_input_layout); + } + } + + virtual void STDMETHODCALLTYPE IAGetInputLayout( + ID3D11InputLayout **out_input_layout) + { + SYNCHRONIZED; + *out_input_layout = input_layout.ref(); + } + + virtual void STDMETHODCALLTYPE IASetVertexBuffers( + unsigned start, + unsigned count, + ID3D11Buffer *const *new_vertex_buffers, + const unsigned *new_strides, + const unsigned *new_offsets) + { + SYNCHRONIZED; + int last_different = -1; + for(unsigned i = 0; i < count; ++i) + { + ID3D11Buffer* buffer = new_vertex_buffers[i]; + if(buffer != input_buffers[start + i].p + || vertex_buffers[start + i].buffer_offset != new_offsets[i] + || vertex_buffers[start + i].stride != new_offsets[i] + ) + { + input_buffers[start + i] = buffer; + vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0; + vertex_buffers[start + i].buffer_offset = new_offsets[i]; + vertex_buffers[start + i].stride = new_strides[i]; + vertex_buffers[start + i].max_index = ~0; + last_different = i; + } + } + if(last_different >= 0) + { + num_vertex_buffers = std::max(num_vertex_buffers, start + count); + update_flags |= UPDATE_VERTEX_BUFFERS; + } + } + + virtual void STDMETHODCALLTYPE IAGetVertexBuffers( + unsigned start, + unsigned count, + ID3D11Buffer **out_vertex_buffers, + unsigned *out_strides, + unsigned *out_offsets) + { + SYNCHRONIZED; + if(out_vertex_buffers) + { + for(unsigned i = 0; i < count; ++i) + out_vertex_buffers[i] = input_buffers[start + i].ref(); + } + + if(out_offsets) + { + for(unsigned i = 0; i < count; ++i) + out_offsets[i] = vertex_buffers[start + i].buffer_offset; + } + + if(out_strides) + { + for(unsigned i = 0; i < count; ++i) + out_strides[i] = vertex_buffers[start + i].stride; + } + } + + void set_index_buffer() + { + pipe_index_buffer ib; + if(!index_buffer) + { + memset(&ib, 0, sizeof(ib)); + } + else + { + if(index_format == DXGI_FORMAT_R32_UINT) + ib.index_size = 4; + else if(index_format == DXGI_FORMAT_R16_UINT) + ib.index_size = 2; + else + ib.index_size = 1; + ib.offset = index_offset; + ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0; + } + pipe->set_index_buffer(pipe, &ib); + } + + virtual void STDMETHODCALLTYPE IASetIndexBuffer( + ID3D11Buffer *new_index_buffer, + DXGI_FORMAT new_index_format, + unsigned new_index_offset) + { + SYNCHRONIZED; + if(index_buffer.p != new_index_buffer || index_format != new_index_format || index_offset != new_index_offset) + { + index_buffer = new_index_buffer; + index_format = new_index_format; + index_offset = new_index_offset; + + set_index_buffer(); + } + } + + virtual void STDMETHODCALLTYPE IAGetIndexBuffer( + ID3D11Buffer **out_index_buffer, + DXGI_FORMAT *out_index_format, + unsigned *out_index_offset) + { + SYNCHRONIZED; + if(out_index_buffer) + *out_index_buffer = index_buffer.ref(); + if(out_index_format) + *out_index_format = index_format; + if(out_index_offset) + *out_index_offset = index_offset; + } + + virtual void STDMETHODCALLTYPE IASetPrimitiveTopology( + D3D11_PRIMITIVE_TOPOLOGY new_primitive_topology) + { + SYNCHRONIZED; + if(primitive_topology != new_primitive_topology) + { + if(new_primitive_topology < D3D_PRIMITIVE_TOPOLOGY_COUNT) + primitive_mode = d3d_to_pipe_prim[new_primitive_topology]; + else + primitive_mode = 0; + primitive_topology = new_primitive_topology; + } + } + + virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology( + D3D11_PRIMITIVE_TOPOLOGY *out_primitive_topology) + { + SYNCHRONIZED; + *out_primitive_topology = primitive_topology; + } + + virtual void STDMETHODCALLTYPE DrawIndexed( + unsigned index_count, + unsigned start_index_location, + int base_vertex_location) + { + SYNCHRONIZED; + if(update_flags) + update_state(); + + pipe_draw_info info; + info.mode = primitive_mode; + info.indexed = TRUE; + info.count = index_count; + info.start = start_index_location; + info.index_bias = base_vertex_location; + info.min_index = 0; + info.max_index = ~0; + info.start_instance = 0; + info.instance_count = 1; + + pipe->draw_vbo(pipe, &info); + } + + virtual void STDMETHODCALLTYPE Draw( + unsigned vertex_count, + unsigned start_vertex_location) + { + SYNCHRONIZED; + if(update_flags) + update_state(); + + pipe_draw_info info; + info.mode = primitive_mode; + info.indexed = FALSE; + info.count = vertex_count; + info.start = start_vertex_location; + info.index_bias = 0; + info.min_index = 0; + info.max_index = ~0; + info.start_instance = 0; + info.instance_count = 1; + + pipe->draw_vbo(pipe, &info); + } + + virtual void STDMETHODCALLTYPE DrawIndexedInstanced( + unsigned index_countPerInstance, + unsigned instance_count, + unsigned start_index_location, + int base_vertex_location, + unsigned start_instance_location) + { + SYNCHRONIZED; + if(update_flags) + update_state(); + + pipe_draw_info info; + info.mode = primitive_mode; + info.indexed = TRUE; + info.count = index_countPerInstance; + info.start = start_index_location; + info.index_bias = base_vertex_location; + info.min_index = 0; + info.max_index = ~0; + info.start_instance = start_instance_location; + info.instance_count = instance_count; + + pipe->draw_vbo(pipe, &info); + } + + virtual void STDMETHODCALLTYPE DrawInstanced( + unsigned vertex_countPerInstance, + unsigned instance_count, + unsigned start_vertex_location, + unsigned start_instance_location) + { + SYNCHRONIZED; + if(update_flags) + update_state(); + + pipe_draw_info info; + info.mode = primitive_mode; + info.indexed = FALSE; + info.count = vertex_countPerInstance; + info.start = start_vertex_location; + info.index_bias = 0; + info.min_index = 0; + info.max_index = ~0; + info.start_instance = start_instance_location; + info.instance_count = instance_count; + + pipe->draw_vbo(pipe, &info); + } + + virtual void STDMETHODCALLTYPE DrawAuto(void) + { + if(!caps.so) + return; + + SYNCHRONIZED; + if(update_flags) + update_state(); + + pipe->draw_stream_output(pipe, primitive_mode); + } + + virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect( + ID3D11Buffer *buffer, + unsigned aligned_byte_offset) + { + SYNCHRONIZED; + if(update_flags) + update_state(); + + struct { + unsigned count; + unsigned instance_count; + unsigned start; + unsigned index_bias; + } data; + + pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data); + + pipe_draw_info info; + info.mode = primitive_mode; + info.indexed = TRUE; + info.start = data.start; + info.count = data.count; + info.index_bias = data.index_bias; + info.min_index = 0; + info.max_index = ~0; + info.start_instance = 0; + info.instance_count = data.instance_count; + + pipe->draw_vbo(pipe, &info); + } + + virtual void STDMETHODCALLTYPE DrawInstancedIndirect( + ID3D11Buffer *buffer, + unsigned aligned_byte_offset) + { + SYNCHRONIZED; + if(update_flags) + update_state(); + + struct { + unsigned count; + unsigned instance_count; + unsigned start; + } data; + + pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data); + + pipe_draw_info info; + info.mode = primitive_mode; + info.indexed = FALSE; + info.start = data.start; + info.count = data.count; + info.index_bias = 0; + info.min_index = 0; + info.max_index = ~0; + info.start_instance = 0; + info.instance_count = data.instance_count; + + pipe->draw_vbo(pipe, &info); + } + +#if API >= 11 + virtual void STDMETHODCALLTYPE Dispatch( + unsigned thread_group_count_x, + unsigned thread_group_count_y, + unsigned thread_group_count_z) + { +// uncomment this when this is implemented +// SYNCHRONIZED; +// if(update_flags) +// update_state(); + } + + virtual void STDMETHODCALLTYPE DispatchIndirect( + ID3D11Buffer *buffer, + unsigned aligned_byte_offset) + { +// uncomment this when this is implemented +// SYNCHRONIZED; +// if(update_flags) +// update_state(); + } +#endif + + void set_clip() + { + pipe_clip_state clip; + clip.nr = 0; + clip.depth_clamp = depth_clamp; + pipe->set_clip_state(pipe, &clip); + } + + virtual void STDMETHODCALLTYPE RSSetState( + ID3D11RasterizerState *new_rasterizer_state) + { + SYNCHRONIZED; + if(new_rasterizer_state != rasterizer_state.p) + { + rasterizer_state = new_rasterizer_state; + pipe->bind_rasterizer_state(pipe, new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->object : default_rasterizer); + bool new_depth_clamp = new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->depth_clamp : false; + if(depth_clamp != new_depth_clamp) + { + depth_clamp = new_depth_clamp; + set_clip(); + } + } + } + + virtual void STDMETHODCALLTYPE RSGetState( + ID3D11RasterizerState **out_rasterizer_state) + { + SYNCHRONIZED; + *out_rasterizer_state = rasterizer_state.ref(); + } + + void set_viewport() + { + // TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube + pipe_viewport_state viewport; + float half_width = viewports[0].Width * 0.5f; + float half_height = viewports[0].Height * 0.5f; + + viewport.scale[0] = half_width; + viewport.scale[1] = -half_height; + viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth); + viewport.scale[3] = 1.0f; + viewport.translate[0] = half_width + viewports[0].TopLeftX; + viewport.translate[1] = half_height + viewports[0].TopLeftY; + viewport.translate[2] = viewports[0].MinDepth; + viewport.translate[3] = 1.0f; + pipe->set_viewport_state(pipe, &viewport); + } + + virtual void STDMETHODCALLTYPE RSSetViewports( + unsigned count, + const D3D11_VIEWPORT *new_viewports) + { + SYNCHRONIZED; + if(count) + { + if(memcmp(&viewports[0], &new_viewports[0], sizeof(viewports[0]))) + { + viewports[0] = new_viewports[0]; + set_viewport(); + } + for(unsigned i = 1; i < count; ++i) + viewports[i] = new_viewports[i]; + } + else if(num_viewports) + { + // TODO: what should we do here? + memset(&viewports[0], 0, sizeof(viewports[0])); + set_viewport(); + } + num_viewports = count; + } + + virtual void STDMETHODCALLTYPE RSGetViewports( + unsigned *out_count, + D3D11_VIEWPORT *out_viewports) + { + SYNCHRONIZED; + if(out_viewports) + { + unsigned i; + for(i = 0; i < std::min(*out_count, num_viewports); ++i) + out_viewports[i] = viewports[i]; + + memset(out_viewports + i, 0, (*out_count - i) * sizeof(D3D11_VIEWPORT)); + } + + *out_count = num_viewports; + } + + void set_scissor() + { + pipe_scissor_state scissor; + scissor.minx = scissor_rects[0].left; + scissor.miny = scissor_rects[0].top; + scissor.maxx = scissor_rects[0].right; + scissor.maxy = scissor_rects[0].bottom; + pipe->set_scissor_state(pipe, &scissor); + } + + virtual void STDMETHODCALLTYPE RSSetScissorRects( + unsigned count, + const D3D11_RECT *new_rects) + { + SYNCHRONIZED; + if(count) + { + if(memcmp(&scissor_rects[0], &new_rects[0], sizeof(scissor_rects[0]))) + { + scissor_rects[0] = new_rects[0]; + set_scissor(); + } + for(unsigned i = 1; i < count; ++i) + scissor_rects[i] = new_rects[i]; + } + else if(num_scissor_rects) + { + // TODO: what should we do here? + memset(&scissor_rects[0], 0, sizeof(scissor_rects[0])); + set_scissor(); + } + + num_scissor_rects = count; + } + + virtual void STDMETHODCALLTYPE RSGetScissorRects( + unsigned *out_count, + D3D11_RECT *out_rects) + { + SYNCHRONIZED; + if(out_rects) + { + unsigned i; + for(i = 0; i < std::min(*out_count, num_scissor_rects); ++i) + out_rects[i] = scissor_rects[i]; + + memset(out_rects + i, 0, (*out_count - i) * sizeof(D3D11_RECT)); + } + + *out_count = num_scissor_rects; + } + + virtual void STDMETHODCALLTYPE OMSetBlendState( + ID3D11BlendState *new_blend_state, + const float new_blend_factor[4], + unsigned new_sample_mask) + { + SYNCHRONIZED; + float white[4] = {1.0f, 1.0f, 1.0f, 1.0f}; + + if(blend_state.p != new_blend_state) + { + pipe->bind_blend_state(pipe, new_blend_state ? ((GalliumD3D11BlendState*)new_blend_state)->object : default_blend); + blend_state = new_blend_state; + } + + // Windows D3D11 does this, even though it's apparently undocumented + if(!new_blend_factor) + new_blend_factor = white; + + if(memcmp(blend_color, new_blend_factor, sizeof(blend_color))) + { + pipe->set_blend_color(pipe, (struct pipe_blend_color*)new_blend_factor); + memcpy(blend_color, new_blend_factor, sizeof(blend_color)); + } + + if(sample_mask != new_sample_mask) + { + pipe->set_sample_mask(pipe, new_sample_mask); + sample_mask = new_sample_mask; + } + } + + virtual void STDMETHODCALLTYPE OMGetBlendState( + ID3D11BlendState **out_blend_state, + float out_blend_factor[4], + unsigned *out_sample_mask) + { + SYNCHRONIZED; + if(out_blend_state) + *out_blend_state = blend_state.ref(); + if(out_blend_factor) + memcpy(out_blend_factor, blend_color, sizeof(blend_color)); + if(out_sample_mask) + *out_sample_mask = sample_mask; + } + + void set_stencil_ref() + { + struct pipe_stencil_ref sref; + sref.ref_value[0] = stencil_ref; + sref.ref_value[1] = stencil_ref; + pipe->set_stencil_ref(pipe, &sref); + } + + virtual void STDMETHODCALLTYPE OMSetDepthStencilState( + ID3D11DepthStencilState *new_depth_stencil_state, + unsigned new_stencil_ref) + { + SYNCHRONIZED; + if(new_depth_stencil_state != depth_stencil_state.p) + { + pipe->bind_depth_stencil_alpha_state(pipe, new_depth_stencil_state ? ((GalliumD3D11DepthStencilState*)new_depth_stencil_state)->object : default_depth_stencil); + depth_stencil_state = new_depth_stencil_state; + } + + if(new_stencil_ref != stencil_ref) + { + stencil_ref = new_stencil_ref; + set_stencil_ref(); + } + } + + virtual void STDMETHODCALLTYPE OMGetDepthStencilState( + ID3D11DepthStencilState **out_depth_stencil_state, + unsigned *out_stencil_ref) + { + SYNCHRONIZED; + if(*out_depth_stencil_state) + *out_depth_stencil_state = depth_stencil_state.ref(); + if(out_stencil_ref) + *out_stencil_ref = stencil_ref; + } + + void set_framebuffer() + { + struct pipe_framebuffer_state fb; + memset(&fb, 0, sizeof(fb)); + if(depth_stencil_view) + { + struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object; + fb.zsbuf = surf; + if(surf->width > fb.width) + fb.width = surf->width; + if(surf->height > fb.height) + fb.height = surf->height; + } + fb.nr_cbufs = num_render_target_views; + unsigned i; + for(i = 0; i < num_render_target_views; ++i) + { + if(render_target_views[i]) + { + struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object; + fb.cbufs[i] = surf; + if(surf->width > fb.width) + fb.width = surf->width; + if(surf->height > fb.height) + fb.height = surf->height; + } + } + + pipe->set_framebuffer_state(pipe, &fb); + } + + /* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't. + * Hopefully nobody relies on this happening + */ + + virtual void STDMETHODCALLTYPE OMSetRenderTargets( + unsigned count, + ID3D11RenderTargetView *const *new_render_target_views, + ID3D11DepthStencilView *new_depth_stencil_view) + { + SYNCHRONIZED; + if(!new_render_target_views) + count = 0; + if(count == num_render_target_views) + { + for(unsigned i = 0; i < count; ++i) + { + if(new_render_target_views[i] != render_target_views[i].p) + goto changed; + } + return; + } +changed: + depth_stencil_view = new_depth_stencil_view; + unsigned i; + for(i = 0; i < count; ++i) + { + render_target_views[i] = new_render_target_views[i]; +#if API >= 11 + om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL; +#endif + } + for(; i < num_render_target_views; ++i) + render_target_views[i] = (ID3D11RenderTargetView*)NULL; + num_render_target_views = count; + set_framebuffer(); + } + + virtual void STDMETHODCALLTYPE OMGetRenderTargets( + unsigned count, + ID3D11RenderTargetView **out_render_target_views, + ID3D11DepthStencilView **out_depth_stencil_view) + { + SYNCHRONIZED; + if(out_render_target_views) + { + unsigned i; + for(i = 0; i < std::min(num_render_target_views, count); ++i) + out_render_target_views[i] = render_target_views[i].ref(); + + for(; i < count; ++i) + out_render_target_views[i] = 0; + } + + if(out_depth_stencil_view) + *out_depth_stencil_view = depth_stencil_view.ref(); + } + +#if API >= 11 + /* TODO: what is this supposed to do _exactly_? are we doing the right thing? */ + virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews( + unsigned rtv_count, + ID3D11RenderTargetView *const *new_render_target_views, + ID3D11DepthStencilView *new_depth_stencil_view, + unsigned uav_start, + unsigned uav_count, + ID3D11UnorderedAccessView *const *new_unordered_access_views, + const unsigned *new_uav_initial_counts) + { + SYNCHRONIZED; + if(rtv_count != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL) + OMSetRenderTargets(rtv_count, new_render_target_views, new_depth_stencil_view); + + if(uav_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS) + { + for(unsigned i = 0; i < uav_count; ++i) + { + om_unordered_access_views[uav_start + i] = new_unordered_access_views[i]; + render_target_views[uav_start + i] = (ID3D11RenderTargetView*)0; + } + } + } + + virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews( + unsigned rtv_count, + ID3D11RenderTargetView **out_render_target_views, + ID3D11DepthStencilView **out_depth_stencil_view, + unsigned uav_start, + unsigned uav_count, + ID3D11UnorderedAccessView **out_unordered_access_views) + { + SYNCHRONIZED; + if(out_render_target_views) + OMGetRenderTargets(rtv_count, out_render_target_views, out_depth_stencil_view); + + if(out_unordered_access_views) + { + for(unsigned i = 0; i < uav_count; ++i) + out_unordered_access_views[i] = om_unordered_access_views[uav_start + i].ref(); + } + } +#endif + + virtual void STDMETHODCALLTYPE SOSetTargets( + unsigned count, + ID3D11Buffer *const *new_so_targets, + const unsigned *new_offsets) + { + SYNCHRONIZED; + unsigned i; + if(!new_so_targets) + count = 0; + bool changed = false; + for(i = 0; i < count; ++i) + { + ID3D11Buffer* buffer = new_so_targets[i]; + if(buffer != so_targets[i].p || new_offsets[i] != so_offsets[i]) + { + so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0; + so_targets[i] = buffer; + so_offsets[i] = new_offsets[i]; + changed = true; + } + } + for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i) + { + if(so_targets[i].p || so_offsets[i]) + { + changed = true; + so_targets[i] = (ID3D11Buffer*)0; + so_offsets[i] = 0; + } + } + num_so_targets = count; + + if(changed && caps.so) + pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets); + } + + virtual void STDMETHODCALLTYPE SOGetTargets( + unsigned count, + ID3D11Buffer **out_so_targets +#if API < 11 + , UINT *out_offsets +#endif + ) + { + SYNCHRONIZED; + for(unsigned i = 0; i < count; ++i) + { + out_so_targets[i] = so_targets[i].ref(); +#if API < 11 + out_offsets[i] = so_offsets[i]; +#endif + } + } + + virtual void STDMETHODCALLTYPE Begin( + ID3D11Asynchronous *async) + { + SYNCHRONIZED; + if(caps.queries) + pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query); + } + + virtual void STDMETHODCALLTYPE End( + ID3D11Asynchronous *async) + { + SYNCHRONIZED; + if(caps.queries) + pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query); + } + + virtual HRESULT STDMETHODCALLTYPE GetData( + ID3D11Asynchronous *iasync, + void *out_data, + unsigned data_size, + unsigned get_data_flags) + { + SYNCHRONIZED; + if(!caps.queries) + return E_NOTIMPL; + + GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)iasync; + void* tmp_data = alloca(async->data_size); + boolean ret = pipe->get_query_result(pipe, async->query, !(get_data_flags & D3D11_ASYNC_GETDATA_DONOTFLUSH), tmp_data); + if(out_data) + memcpy(out_data, tmp_data, std::min(async->data_size, data_size)); + return ret ? S_OK : S_FALSE; + } + + void set_render_condition() + { + if(caps.render_condition) + { + if(!render_predicate) + pipe->render_condition(pipe, 0, 0); + else + { + GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p; + if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE) + { + unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT; + pipe->render_condition(pipe, predicate->query, mode); + } + else + { + /* TODO: add inverted predication to Gallium*/ + pipe->render_condition(pipe, 0, 0); + } + } + } + } + + virtual void STDMETHODCALLTYPE SetPredication( + ID3D11Predicate *new_predicate, + BOOL new_predicate_value) + { + SYNCHRONIZED; + if(render_predicate.p != new_predicate || render_predicate_value != new_predicate_value) + { + render_predicate = new_predicate; + render_predicate_value = new_predicate_value; + set_render_condition(); + } + } + + virtual void STDMETHODCALLTYPE GetPredication( + ID3D11Predicate **out_predicate, + BOOL *out_predicate_value) + { + SYNCHRONIZED; + if(out_predicate) + *out_predicate = render_predicate.ref(); + if(out_predicate_value) + *out_predicate_value = render_predicate_value; + } + + static pipe_subresource d3d11_to_pipe_subresource(struct pipe_resource* resource, unsigned subresource) + { + pipe_subresource sr; + if(subresource <= resource->last_level) + { + sr.level = subresource; + sr.face = 0; + } + else + { + unsigned levels = resource->last_level + 1; + sr.level = subresource % levels; + sr.face = subresource / levels; + } + return sr; + } + + /* TODO: deferred contexts will need a different implementation of this, + * because we can't put the transfer info into the resource itself. + * Also, there are very different restrictions, for obvious reasons. + */ + virtual HRESULT STDMETHODCALLTYPE Map( + ID3D11Resource *iresource, + unsigned subresource, + D3D11_MAP map_type, + unsigned map_flags, + D3D11_MAPPED_SUBRESOURCE *mapped_resource) + { + SYNCHRONIZED; + GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource; + if(resource->transfers.count(subresource)) + return E_FAIL; + pipe_subresource sr = d3d11_to_pipe_subresource(resource->resource, subresource); + pipe_box box = d3d11_to_pipe_box(resource->resource, sr.level, 0); + unsigned usage = 0; + if(map_type == D3D11_MAP_READ) + usage = PIPE_TRANSFER_READ; + else if(map_type == D3D11_MAP_WRITE) + usage = PIPE_TRANSFER_WRITE; + else if(map_type == D3D11_MAP_READ_WRITE) + usage = PIPE_TRANSFER_READ_WRITE; + else if(map_type == D3D11_MAP_WRITE_DISCARD) + usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD; + else if(map_type == D3D11_MAP_WRITE_NO_OVERWRITE) + usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE; + else + return E_INVALIDARG; + if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT) + usage |= PIPE_TRANSFER_DONTBLOCK; + struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, sr, usage, &box); + if(!transfer) { + if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT) + return DXGI_ERROR_WAS_STILL_DRAWING; + else + return E_FAIL; + } + resource->transfers[subresource] = transfer; + mapped_resource->pData = pipe->transfer_map(pipe, transfer); + mapped_resource->RowPitch = transfer->stride; + mapped_resource->DepthPitch = transfer->slice_stride; + return S_OK; + } + + virtual void STDMETHODCALLTYPE Unmap( + ID3D11Resource *iresource, + unsigned subresource) + { + SYNCHRONIZED; + GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource; + std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(subresource); + if(i != resource->transfers.end()) + { + pipe->transfer_unmap(pipe, i->second); + pipe->transfer_destroy(pipe, i->second); + resource->transfers.erase(i); + } + } + + virtual void STDMETHODCALLTYPE CopySubresourceRegion( + ID3D11Resource *dst_resource, + unsigned dst_subresource, + unsigned dst_x, + unsigned dst_y, + unsigned dst_z, + ID3D11Resource *src_resource, + unsigned src_subresource, + const D3D11_BOX *src_box) + { + SYNCHRONIZED; + GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; + GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource; + pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, dst_subresource); + pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, src_subresource); + pipe_box box = d3d11_to_pipe_box(src->resource, subsrc.level, src_box); + for(unsigned i = 0; i < box.depth; ++i) + { + pipe->resource_copy_region(pipe, + dst->resource, subdst, dst_x, dst_y, dst_z + i, + src->resource, subsrc, box.x, box.y, box.z + i, + box.width, box.height); + } + } + + virtual void STDMETHODCALLTYPE CopyResource( + ID3D11Resource *dst_resource, + ID3D11Resource *src_resource) + { + SYNCHRONIZED; + GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; + GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource; + pipe_subresource sr; + unsigned faces = dst->resource->target == PIPE_TEXTURE_CUBE ? 6 : 1; + + for(sr.face = 0; sr.face < faces; ++sr.face) + { + for(sr.level = 0; sr.level <= dst->resource->last_level; ++sr.level) + { + unsigned w = u_minify(dst->resource->width0, sr.level); + unsigned h = u_minify(dst->resource->height0, sr.level); + unsigned d = u_minify(dst->resource->depth0, sr.level); + for(unsigned i = 0; i < d; ++i) + { + pipe->resource_copy_region(pipe, + dst->resource, sr, 0, 0, i, + src->resource, sr, 0, 0, i, + w, h); + } + } + } + } + + virtual void STDMETHODCALLTYPE UpdateSubresource( + ID3D11Resource *dst_resource, + unsigned dst_subresource, + const D3D11_BOX *pDstBox, + const void *pSrcData, + unsigned src_row_pitch, + unsigned src_depth_pitch) + { + SYNCHRONIZED; + GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; + pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, dst_subresource); + pipe_box box = d3d11_to_pipe_box(dst->resource, subdst.level, pDstBox); + pipe->transfer_inline_write(pipe, dst->resource, subdst, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch); + } + +#if API >= 11 + virtual void STDMETHODCALLTYPE CopyStructureCount( + ID3D11Buffer *dst_buffer, + unsigned dst_aligned_byte_offset, + ID3D11UnorderedAccessView *src_view) + { + SYNCHRONIZED; + } +#endif + + virtual void STDMETHODCALLTYPE ClearRenderTargetView( + ID3D11RenderTargetView *render_target_view, + const float color[4]) + { + SYNCHRONIZED; + GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)render_target_view); + pipe->clear_render_target(pipe, view->object, color, 0, 0, view->object->width, view->object->height); + } + + virtual void STDMETHODCALLTYPE ClearDepthStencilView( + ID3D11DepthStencilView *depth_stencil_view, + unsigned clear_flags, + float depth, + UINT8 stencil) + { + SYNCHRONIZED; + GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)depth_stencil_view); + unsigned flags = 0; + if(clear_flags & D3D11_CLEAR_DEPTH) + flags |= PIPE_CLEAR_DEPTH; + if(clear_flags & D3D11_CLEAR_STENCIL) + flags |= PIPE_CLEAR_STENCIL; + pipe->clear_depth_stencil(pipe, view->object, flags, depth, stencil, 0, 0, view->object->width, view->object->height); + } + +#if API >= 11 + virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint( + ID3D11UnorderedAccessView *unordered_access_view, + const unsigned values[4]) + { + SYNCHRONIZED; + } + + virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat( + ID3D11UnorderedAccessView *unordered_access_view, + const float values[4]) + { + SYNCHRONIZED; + } +#endif + + void restore_gallium_state_blit_only() + { + pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend); + pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil); + pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer); + pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout); + pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]); + pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]); + if(caps.gs) + pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]); + set_framebuffer(); + set_viewport(); + set_clip(); + set_render_condition(); + // TODO: restore stream output + + update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS)); + } + + virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly() + { + SYNCHRONIZED; + restore_gallium_state_blit_only(); + } + + virtual void STDMETHODCALLTYPE GenerateMips( + ID3D11ShaderResourceView *shader_resource_view) + { + SYNCHRONIZED; + + GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)shader_resource_view; + if(caps.gs) + pipe->bind_gs_state(pipe, 0); + if(caps.so) + pipe->bind_stream_output_state(pipe, 0); + if(pipe->render_condition) + pipe->render_condition(pipe, 0, 0); + util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR); + restore_gallium_state_blit_only(); + } + + virtual void STDMETHODCALLTYPE RestoreGalliumState() + { + SYNCHRONIZED; + restore_gallium_state_blit_only(); + + set_index_buffer(); + set_stencil_ref(); + pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color); + pipe->set_sample_mask(pipe, sample_mask); + + for(unsigned s = 0; s < 3; ++s) + { + unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT); + for(unsigned i = 0; i < num; ++i) + pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0); + } + + if(caps.so) + pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets); + + update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS)); + update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS)); + + set_scissor(); + } + +#if API >= 11 + /* TODO: hack SRVs or sampler states to handle this, or add to Gallium */ + virtual void STDMETHODCALLTYPE SetResourceMinLOD( + ID3D11Resource *iresource, + float min_lod) + { + SYNCHRONIZED; + GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource; + if(resource->min_lod != min_lod) + { + // TODO: actually do anything? + resource->min_lod = min_lod; + } + } + + virtual float STDMETHODCALLTYPE GetResourceMinLOD( + ID3D11Resource *iresource) + { + SYNCHRONIZED; + GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource; + return resource->min_lod; + } +#endif + + virtual void STDMETHODCALLTYPE ResolveSubresource( + ID3D11Resource *dst_resource, + unsigned dst_subresource, + ID3D11Resource *src_resource, + unsigned src_subresource, + DXGI_FORMAT format) + { + SYNCHRONIZED; + GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; + GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource; + pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, dst_subresource); + pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, src_subresource); + pipe->resource_resolve(pipe, dst->resource, subdst, src->resource, subsrc); + } + +#if API >= 11 + virtual void STDMETHODCALLTYPE ExecuteCommandList( + ID3D11CommandList *command_list, + BOOL restore_context_state) + { + SYNCHRONIZED; + } + + virtual HRESULT STDMETHODCALLTYPE FinishCommandList( + BOOL restore_deferred_context_state, + ID3D11CommandList **out_command_list) + { + SYNCHRONIZED; + return E_NOTIMPL; + } +#endif + + virtual void STDMETHODCALLTYPE ClearState(void) + { + /* we don't take a lock here because we would deadlock otherwise + * TODO: this is probably incorrect, because ClearState should likely be atomic. + * However, I can't think of any correct usage that would be affected by this + * being non-atomic, and making this atomic is quite expensive and complicates + * the code + */ + + // we qualify all calls so that we avoid virtual dispatch and might get them inlined + // TODO: make sure all this gets inlined, which might require more compiler flags + // TODO: optimize this +#if API >= 11 + GalliumD3D11DeviceContext::PSSetShader(0, 0, 0); + GalliumD3D11DeviceContext::GSSetShader(0, 0, 0); + GalliumD3D11DeviceContext::VSSetShader(0, 0, 0); + GalliumD3D11DeviceContext::HSSetShader(0, 0, 0); + GalliumD3D11DeviceContext::DSSetShader(0, 0, 0); + GalliumD3D11DeviceContext::CSSetShader(0, 0, 0); +#else + GalliumD3D11DeviceContext::PSSetShader(0); + GalliumD3D11DeviceContext::GSSetShader(0); + GalliumD3D11DeviceContext::VSSetShader(0); +#endif + + GalliumD3D11DeviceContext::IASetInputLayout(0); + GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0); + GalliumD3D11DeviceContext::RSSetState(0); + GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0); + GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0); + GalliumD3D11DeviceContext::SetPredication(0, 0); + GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED); + + GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); + GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); + GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); +#if API >= 11 + GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); + GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); + GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); +#endif + + GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data); +#if API >= 11 + GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0); +#else + GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 ); +#endif + GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0); + + GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data); + GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data); + GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data); +#if API >= 11 + GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data); + GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data); + GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data); +#endif + + GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data); + GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data); + GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data); +#if API >= 11 + GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data); + GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data); + GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data); +#endif + + GalliumD3D11DeviceContext::RSSetViewports(0, 0); + GalliumD3D11DeviceContext::RSSetScissorRects(0, 0); + } + + virtual void STDMETHODCALLTYPE Flush(void) + { + SYNCHRONIZED; + pipe->flush(pipe, PIPE_FLUSH_FRAME, 0); + } + + /* In Direct3D 10, if the reference count of an object drops to 0, it is automatically + * cleanly unbound from the pipeline. + * In Direct3D 11, the pipeline holds a reference. + * + * Note that instead of always scanning the pipeline on destruction, we could + * maintain the internal reference count on DirectX 10 and use it to check if an + * object is still bound. + * Presumably, on average, scanning is faster if the application is well written. + */ +#if API < 11 +#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \ + void Unbind##name(ID3D11##name* state) \ + { \ + SYNCHRONIZED; \ + if((void*)state == (void*)member.p) \ + { \ + member.p = 0; \ + pipe->bind_##gallium##_state(pipe, default_##def); \ + } \ + } + IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend) + IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer) + IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil) + IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout) + IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS]) + IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS]) + IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS]) + + void UnbindPredicate(ID3D11Predicate* predicate) + { + SYNCHRONIZED; + if(predicate == render_predicate) + { + render_predicate.p = NULL; + render_predicate_value = 0; + pipe->render_condition(pipe, 0, 0); + } + } + + void UnbindSamplerState(ID3D11SamplerState* state) + { + SYNCHRONIZED; + for(unsigned s = 0; s < D3D11_STAGES; ++s) + { + for(unsigned i = 0; i < num_samplers[s]; ++i) + { + if(samplers[s][i] == state) + { + samplers[s][i].p = NULL; + sampler_csos[s].v[i] = NULL; + update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)); + } + } + } + } + + void UnbindBuffer(ID3D11Buffer* buffer) + { + SYNCHRONIZED; + if(buffer == index_buffer) + { + index_buffer.p = 0; + index_format = DXGI_FORMAT_UNKNOWN; + index_offset = 0; + struct pipe_index_buffer ib; + memset(&ib, 0, sizeof(ib)); + pipe->set_index_buffer(pipe, &ib); + } + + for(unsigned i = 0; i < num_vertex_buffers; ++i) + { + if(buffer == input_buffers[i]) + { + input_buffers[i].p = 0; + memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers])); + update_flags |= UPDATE_VERTEX_BUFFERS; + } + } + + for(unsigned s = 0; s < D3D11_STAGES; ++s) + { + for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i) + { + if(constant_buffers[s][i] == buffer) + { + constant_buffers[s][i] = (ID3D10Buffer*)NULL; + pipe->set_constant_buffer(pipe, s, i, NULL); + } + } + } + } + + void UnbindDepthStencilView(ID3D11DepthStencilView * view) + { + SYNCHRONIZED; + if(view == depth_stencil_view) + { + depth_stencil_view.p = NULL; + set_framebuffer(); + } + } + + void UnbindRenderTargetView(ID3D11RenderTargetView* view) + { + SYNCHRONIZED; + bool any_bound = false; + for(unsigned i = 0; i < num_render_target_views; ++i) + { + if(render_target_views[i] == view) + { + render_target_views[i].p = NULL; + any_bound = true; + } + } + if(any_bound) + set_framebuffer(); + } + + void UnbindShaderResourceView(ID3D11ShaderResourceView* view) + { + SYNCHRONIZED; + for(unsigned s = 0; s < D3D11_STAGES; ++s) + { + for(unsigned i = 0; i < num_shader_resource_views[s]; ++i) + { + if(shader_resource_views[s][i] == view) + { + shader_resource_views[s][i].p = NULL; + sampler_views[s][i] = NULL; + update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s)); + } + } + } + } +#endif + +#undef SYNCHRONIZED +}; + +#if API >= 11 +/* This approach serves two purposes. + * First, we don't want to do an atomic operation to manipulate the reference + * count every time something is bound/unbound to the pipeline, since they are + * expensive. + * Fortunately, the immediate context can only be used by a single thread, so + * we don't have to use them, as long as a separate reference count is used + * (see dual_refcnt_t). + * + * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device + * garbage cycle. + * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds + * one for each external reference count, while internal nonatomic_add_ref doesn't + * add any. + * + * Note that ideally we would to eliminate the non-atomic op too, but this is more + * complicated, since we would either need to use garbage collection and give up + * deterministic destruction (especially bad for large textures), or scan the whole + * pipeline state every time the reference count of object drops to 0, which risks + * pathological slowdowns. + * + * Since this microoptimization should matter relatively little, let's avoid it for now. + * + * Note that deferred contexts don't use this, since as a whole, they must thread-safe. + * Eliminating the atomic ops for deferred contexts seems substantially harder. + * This might be a problem if they are used in a one-shot multithreaded rendering + * fashion, where SMP cacheline bouncing on the reference count may be visible. + * + * The idea would be to attach a structure of reference counts indexed by deferred + * context id to each object. Ideally, this should be organized like ext2 block pointers. + * + * Every deferred context would get a reference count in its own cacheline. + * The external count is protected by a lock bit, and there is also a "lock bit" in each + * internal count. + * + * When the external count has to be dropped to 0, the lock bit is taken and all internal + * reference counts are scanned, taking a count of them. A flag would also be set on them. + * Deferred context manipulation would notice the flag, and update the count. + * Once the count goes to zero, the object is freed. + * + * The problem of this is that if the external reference count ping-pongs between + * zero and non-zero, the scans will take a lot of time. + * + * The idea to solve this is to compute the scans in a binary-tree like fashion, where + * each binary tree node would have a "determined bit", which would be invalidated + * by manipulations. + * + * However, all this complexity might actually be a loss in most cases, so let's just + * stick to a single atomic refcnt for now. + * + * Also, we don't even support deferred contexts yet, so this can wait. + */ +struct nonatomic_device_child_ptr_traits +{ + static void add_ref(void* p) + { + if(p) + ((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref(); + } + + static void release(void* p) + { + if(p) + ((GalliumD3D11DeviceChild<>*)p)->nonatomic_release(); + } +}; + +struct GalliumD3D11ImmediateDeviceContext + : public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits> +{ + GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0) + : GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>(device, pipe, context_flags) + { + // not necessary, but tests that the API at least basically works + ClearState(); + } + + /* we do this since otherwise we would have a garbage cycle between this and the device */ + virtual ULONG STDMETHODCALLTYPE AddRef() + { + return this->device->AddRef(); + } + + virtual ULONG STDMETHODCALLTYPE Release() + { + return this->device->Release(); + } + + virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType() + { + return D3D11_DEVICE_CONTEXT_IMMEDIATE; + } +}; + +static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe) +{ + return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe); +} + +static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context) +{ + ((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState(); +} + +static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context) +{ + ((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly(); +} + +static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context) +{ + delete (GalliumD3D11ImmediateDeviceContext*)context; +} +#endif diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_misc.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_misc.h new file mode 100644 index 0000000000..357f51bcb9 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_misc.h @@ -0,0 +1,92 @@ +#if API < 11 +extern "C" HRESULT STDMETHODCALLTYPE D3D10CreateBlob( + SIZE_T num_bytes, + LPD3D10BLOB *out_buffer +); + +HRESULT STDMETHODCALLTYPE D3D10CreateBlob( + SIZE_T num_bytes, + LPD3D10BLOB *out_buffer +) +{ + void* data = malloc(num_bytes); + if(!data) + return E_OUTOFMEMORY; + *out_buffer = new GalliumD3DBlob(data, num_bytes); + return S_OK; +} + +LPCSTR STDMETHODCALLTYPE D3D10GetPixelShaderProfile( + ID3D10Device *device +) +{ + return "ps_4_0"; +} + +LPCSTR STDMETHODCALLTYPE D3D10GetVertexShaderProfile( + ID3D10Device *device +) +{ + return "vs_4_0"; +} + +LPCSTR STDMETHODCALLTYPE D3D10GetGeometryShaderProfile( + ID3D10Device *device +) +{ + return "gs_4_0"; +} + +static HRESULT dxbc_assemble_as_blob(struct dxbc_chunk_header** chunks, unsigned num_chunks, ID3D10Blob** blob) +{ + std::pair<void*, size_t> p = dxbc_assemble(chunks, num_chunks); + if(!p.first) + return E_OUTOFMEMORY; + *blob = new GalliumD3DBlob(p.first, p.second); + return S_OK; +} + +HRESULT D3D10GetInputSignatureBlob( + const void *shader_bytecode, + SIZE_T bytecode_length, + ID3D10Blob **signature_blob +) +{ + dxbc_chunk_signature* sig = dxbc_find_signature(shader_bytecode, bytecode_length, false); + if(!sig) + return E_FAIL; + + return dxbc_assemble_as_blob((dxbc_chunk_header**)&sig, 1, signature_blob); +} + +HRESULT D3D10GetOutputSignatureBlob( + const void *shader_bytecode, + SIZE_T bytecode_length, + ID3D10Blob **signature_blob +) +{ + dxbc_chunk_signature* sig = dxbc_find_signature(shader_bytecode, bytecode_length, true); + if(!sig) + return E_FAIL; + + return dxbc_assemble_as_blob((dxbc_chunk_header**)&sig, 1, signature_blob); +} + +HRESULT D3D10GetInputAndOutputSignatureBlob( + const void *shader_bytecode, + SIZE_T bytecode_length, + ID3D10Blob **signature_blob +) +{ + dxbc_chunk_signature* sigs[2]; + sigs[0] = dxbc_find_signature(shader_bytecode, bytecode_length, false); + if(!sigs[0]) + return E_FAIL; + sigs[1] = dxbc_find_signature(shader_bytecode, bytecode_length, true); + if(!sigs[1]) + return E_FAIL; + + return dxbc_assemble_as_blob((dxbc_chunk_header**)&sigs, 2, signature_blob); +} + +#endif diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_objects.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_objects.h new file mode 100644 index 0000000000..836603eccc --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_objects.h @@ -0,0 +1,717 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +template<typename Base = ID3D11DeviceChild> +struct GalliumD3D11DeviceChild : public GalliumPrivateDataComObject<Base, dual_refcnt_t> +{ + GalliumD3D11Screen* device; // must not be null + + + // if this is called, the subclass constructor must set device itself + GalliumD3D11DeviceChild() + : device(0) + {} + + GalliumD3D11DeviceChild(GalliumD3D11Screen* p_device) + { + // we store the reference count minus one in refcnt + device = p_device; + device->AddRef(); + } + + /* The purpose of this is to avoid cyclic garbage, since this won't hold + * a pointer to the device if it is only held by a pipeline binding in the immediate context + * + * TODO: we could only manipulate the device refcnt when atomic_refcnt == 0 changes, + * but this requires more complex atomic ops + */ + inline ULONG add_ref() + { + device->AddRef(); + return GalliumPrivateDataComObject<Base, dual_refcnt_t>::add_ref(); + } + + inline ULONG release() + { + device->Release(); + return GalliumPrivateDataComObject<Base, dual_refcnt_t>::release(); + } + + virtual ULONG STDMETHODCALLTYPE AddRef() + { + return add_ref(); + } + + virtual ULONG STDMETHODCALLTYPE Release() + { + return release(); + } + + virtual void STDMETHODCALLTYPE GetDevice( + ID3D11Device **out_device + ) + { + device->AddRef(); + *out_device = device; + } +}; + +template<typename Base = ID3D11DeviceChild, typename Object = void> +struct GalliumD3D11Object : public GalliumD3D11DeviceChild<Base> +{ + Object* object; + GalliumD3D11Object(GalliumD3D11Screen* device, Object* object) + : GalliumD3D11DeviceChild<Base>(device), object(object) + {} + + virtual ~GalliumD3D11Object(); +}; + +#define IMPLEMENT_OBJECT_DTOR(name, gallium) \ +template<> \ +GalliumD3D11Object<ID3D11##name, void>::~GalliumD3D11Object() \ +{ \ + DX10_ONLY(device->Unbind##name(this)); \ + device->immediate_pipe->delete_##gallium##_state(device->immediate_pipe, object); \ +} + +#define IMPLEMENT_VIEW_DTOR(name, gallium) \ +template<> \ +GalliumD3D11Object<ID3D11##name, struct pipe_##gallium>::~GalliumD3D11Object() \ +{ \ + DX10_ONLY(device->Unbind##name(this)); \ + pipe_##gallium##_reference(&object, 0); \ +} + +IMPLEMENT_OBJECT_DTOR(InputLayout, vertex_elements) +IMPLEMENT_OBJECT_DTOR(DepthStencilState, depth_stencil_alpha) +IMPLEMENT_OBJECT_DTOR(RasterizerState, rasterizer) +IMPLEMENT_OBJECT_DTOR(SamplerState, sampler) +IMPLEMENT_OBJECT_DTOR(BlendState, blend) +IMPLEMENT_OBJECT_DTOR(VertexShader, vs) +IMPLEMENT_OBJECT_DTOR(PixelShader, fs) +IMPLEMENT_OBJECT_DTOR(GeometryShader, gs) + +IMPLEMENT_VIEW_DTOR(ShaderResourceView, sampler_view) +IMPLEMENT_VIEW_DTOR(RenderTargetView, surface) +IMPLEMENT_VIEW_DTOR(DepthStencilView, surface) + +#if API >= 11 +// IMPLEMENT_VIEW_DTOR(UnorderedAccessView, surface); +// IMPLEMENT_OBJECT_DTOR(HullShader, tcs); +// IMPLEMENT_OBJECT_DTOR(DomainShader, tes); +// IMPLEMENT_OBJECT_DTOR(ComputeShader, cs); +#else +IMPLEMENT_OBJECT_DTOR(BlendState1, blend) +IMPLEMENT_VIEW_DTOR(ShaderResourceView1, sampler_view) +#endif + +template<typename Base, typename Desc, typename Object = void> +struct GalliumD3D11DescribedObject : public GalliumD3D11Object<Base, Object> +{ + Desc desc; + GalliumD3D11DescribedObject(GalliumD3D11Screen* device, Object* object, const Desc& desc) + : GalliumD3D11Object<Base, Object>(device, object), desc(desc) + {} + + virtual void STDMETHODCALLTYPE GetDesc(Desc *out_desc) + { + memcpy(out_desc, &desc, sizeof(desc)); + } +}; + +typedef GalliumD3D11Object<ID3D11InputLayout> GalliumD3D11InputLayout; +typedef GalliumD3D11DescribedObject<ID3D11DepthStencilState, D3D11_DEPTH_STENCIL_DESC> GalliumD3D11DepthStencilState; +typedef GalliumD3D11DescribedObject<ID3D11RasterizerState, D3D11_RASTERIZER_DESC> GalliumD3D11RasterizerStateBase; +typedef GalliumD3D11DescribedObject<ID3D11SamplerState, D3D11_SAMPLER_DESC> GalliumD3D11SamplerState; + +#if API >= 11 +typedef GalliumD3D11DescribedObject<ID3D11BlendState, D3D11_BLEND_DESC> GalliumD3D11BlendState; +#else +typedef GalliumD3D10DescribedObject<ID3D10BlendState1, D3D10_BLEND_DESC> GalliumD3D10BlendStateBase; + +struct GalliumD3D10BlendState : public GalliumD3D10BlendStateBase +{ + static D3D10_BLEND_DESC convert_to_d3d10(const D3D10_BLEND_DESC1& desc1) + { + D3D10_BLEND_DESC desc; + desc.AlphaToCoverageEnable = desc1.AlphaToCoverageEnable; + desc.SrcBlend = desc1.RenderTarget[0].SrcBlend; + desc.DestBlend = desc1.RenderTarget[0].DestBlend; + desc.BlendOp = desc1.RenderTarget[0].BlendOp; + desc.SrcBlendAlpha = desc1.RenderTarget[0].SrcBlendAlpha; + desc.DestBlendAlpha = desc1.RenderTarget[0].DestBlendAlpha; + desc.BlendOpAlpha = desc1.RenderTarget[0].BlendOpAlpha; + for(unsigned i = 0; i < 8; ++i) + { + desc.BlendEnable[i] = desc1.RenderTarget[i].BlendEnable; + desc.RenderTargetWriteMask[i] = desc1.RenderTarget[i].RenderTargetWriteMask; + } + return desc; + } + + D3D10_BLEND_DESC1 desc1; + + GalliumD3D10BlendState(GalliumD3D10Screen* device, void* object, const D3D10_BLEND_DESC& desc) + : GalliumD3D10BlendStateBase(device, object, desc) + { + memset(&desc1, 0, sizeof(desc1)); + desc1.AlphaToCoverageEnable = desc.AlphaToCoverageEnable; + desc1.RenderTarget[0].SrcBlend = desc.SrcBlend; + desc1.RenderTarget[0].DestBlend = desc.DestBlend; + desc1.RenderTarget[0].BlendOp = desc.BlendOp; + desc1.RenderTarget[0].SrcBlendAlpha = desc.SrcBlendAlpha; + desc1.RenderTarget[0].DestBlendAlpha = desc.DestBlendAlpha; + desc1.RenderTarget[0].BlendOpAlpha = desc.BlendOpAlpha; + for(unsigned i = 0; i < 8; ++i) + { + desc1.RenderTarget[i].BlendEnable = desc.BlendEnable[i]; + desc1.RenderTarget[i].RenderTargetWriteMask = desc.RenderTargetWriteMask[i]; + } + } + + GalliumD3D10BlendState(GalliumD3D10Screen* device, void* object, const D3D10_BLEND_DESC1& desc) + : GalliumD3D10BlendStateBase(device, object, convert_to_d3d10(desc)), desc1(desc1) + {} + + virtual void STDMETHODCALLTYPE GetDesc1(D3D10_BLEND_DESC1 *out_desc) + { + memcpy(out_desc, &desc1, sizeof(desc1)); + } +}; +#endif + +struct GalliumD3D11RasterizerState : public GalliumD3D11RasterizerStateBase +{ + bool depth_clamp; + + GalliumD3D11RasterizerState(GalliumD3D11Screen* device, void* object, const D3D11_RASTERIZER_DESC& desc, bool depth_clamp) + : GalliumD3D11RasterizerStateBase(device, object, desc), depth_clamp(depth_clamp) + {} +}; + +template<typename Base = ID3D11DeviceChild> +struct GalliumD3D11Shader : public GalliumD3D11Object<Base> +{ + std::vector<int> slot_to_resource; + std::vector<int> slot_to_sampler; + + GalliumD3D11Shader(GalliumD3D11Screen* device, void* object) + : GalliumD3D11Object<Base>(device, object) + {} +}; + +typedef GalliumD3D11Shader<ID3D11VertexShader> GalliumD3D11VertexShader; +typedef GalliumD3D11Shader<ID3D11GeometryShader> GalliumD3D11GeometryShader; +typedef GalliumD3D11Shader<ID3D11PixelShader> GalliumD3D11PixelShader; + +#if API >= 11 +/* +typedef GalliumD3D11Shader<ID3D11HullShader> GalliumD3D11HullShader; +typedef GalliumD3D11Shader<ID3D11DomainShader> GalliumD3D11DomainShader; +typedef GalliumD3D11Shader<ID3D11ComputeShader> GalliumD3D11ComputeShader; +*/ +#endif + +template<typename Base = ID3D11Resource> +struct GalliumD3D11ResourceBase : public GalliumD3D11DeviceChild<Base> +{ + unsigned eviction_priority; + + virtual void STDMETHODCALLTYPE SetEvictionPriority( + unsigned new_eviction_priority + ) + { + eviction_priority = new_eviction_priority; + } + + virtual unsigned STDMETHODCALLTYPE GetEvictionPriority() + { + return eviction_priority; + } +}; + +template<typename Real> +struct GalliumDXGIResource : public IDXGIResource +{ + virtual HRESULT STDMETHODCALLTYPE SetEvictionPriority( + unsigned new_eviction_priority + ) + { + static_cast<Real*>(this)->eviction_priority = new_eviction_priority; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetEvictionPriority(unsigned* out_eviction_priority) + { + *out_eviction_priority = static_cast<Real*>(this)->eviction_priority; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetDevice( + REFIID riid, + void **out_parent) + { + if(!static_cast<Real*>(this)->device) + return E_NOINTERFACE; + return static_cast<Real*>(this)->device->QueryInterface(riid, out_parent); + } + + virtual HRESULT STDMETHODCALLTYPE GetParent( + REFIID riid, + void **out_parent) + { + if(!static_cast<Real*>(this)->device) + return E_NOINTERFACE; + return static_cast<Real*>(this)->device->QueryInterface(riid, out_parent); + } +}; + +template<typename T> +struct com_traits<GalliumDXGIResource<T> > : public com_traits<IDXGIResource> +{}; + +template<typename Base = ID3D11Resource> +struct GalliumD3D11Resource + : public GalliumMultiComObject< + GalliumMultiPrivateDataComObject< + GalliumD3D11ResourceBase<Base>, + GalliumDXGIResource<GalliumD3D11Resource<Base> > + >, + IGalliumResource + > +{ + struct pipe_resource* resource; + std::unordered_map<unsigned, pipe_transfer*> transfers; + float min_lod; + DXGI_USAGE dxgi_usage; + + GalliumD3D11Resource(GalliumD3D11Screen* device = 0, struct pipe_resource* resource = 0, unsigned dxgi_usage = 0) + : resource(resource), min_lod(0), dxgi_usage(dxgi_usage) + { + this->device = device; + if(device) + device->AddRef(); + this->eviction_priority = 0; + } + + ~GalliumD3D11Resource() + { + pipe_resource_reference(&resource, 0); + } + + virtual HRESULT STDMETHODCALLTYPE GetUsage( + DXGI_USAGE *out_usage + ) + { + *out_usage = this->dxgi_usage; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetSharedHandle(HANDLE *out_shared_handle) + { + return E_NOTIMPL; + } + + virtual struct pipe_resource* STDMETHODCALLTYPE GetGalliumResource() + { + return resource; + } +}; + +template<typename Base, typename Desc, D3D11_RESOURCE_DIMENSION Dim> +struct GalliumD3D11TypedResource : public GalliumD3D11Resource<Base> +{ + Desc desc; + GalliumD3D11TypedResource() {} + GalliumD3D11TypedResource(GalliumD3D11Screen* device, struct pipe_resource* resource, const Desc& desc, unsigned dxgi_usage) + : GalliumD3D11Resource<Base>(device, resource, dxgi_usage), desc(desc) + {} + virtual void STDMETHODCALLTYPE GetType( + D3D11_RESOURCE_DIMENSION *out_resource_dimension) + { + *out_resource_dimension = Dim; + } + virtual void STDMETHODCALLTYPE GetDesc(Desc *out_desc) + { + memcpy(out_desc, &desc, sizeof(desc)); + } +}; + +typedef GalliumD3D11TypedResource<ID3D11Texture1D, D3D11_TEXTURE1D_DESC, D3D11_RESOURCE_DIMENSION_TEXTURE1D> GalliumD3D11Texture1DBase; +typedef GalliumD3D11TypedResource<ID3D11Texture2D, D3D11_TEXTURE2D_DESC, D3D11_RESOURCE_DIMENSION_TEXTURE2D> GalliumD3D11Texture2DBase; +typedef GalliumD3D11TypedResource<ID3D11Texture3D, D3D11_TEXTURE3D_DESC, D3D11_RESOURCE_DIMENSION_TEXTURE3D> GalliumD3D11Texture3DBase; +typedef GalliumD3D11TypedResource<ID3D11Buffer, D3D11_BUFFER_DESC, D3D11_RESOURCE_DIMENSION_BUFFER> GalliumD3D11BufferBase; + +#if API >= 11 +typedef GalliumD3D11BufferBase GalliumD3D11Buffer; +typedef GalliumD3D11Texture1DBase GalliumD3D11Texture1D; +typedef GalliumD3D11Texture2DBase GalliumD3D11Texture2D; +typedef GalliumD3D11Texture3DBase GalliumD3D11Texture3D; +#else +struct GalliumD3D10Buffer : public GalliumD3D10BufferBase +{ + GalliumD3D10Buffer(GalliumD3D10Screen* device, struct pipe_resource* resource, const D3D10_BUFFER_DESC& desc, unsigned dxgi_usage) + : GalliumD3D10BufferBase(device, resource, desc, dxgi_usage) + {} + + ~GalliumD3D10Buffer() + { + device->UnbindBuffer(this); + } + + virtual HRESULT STDMETHODCALLTYPE Map( + D3D10_MAP map_type, + unsigned map_flags, + void **out_data) + { + D3D10_MAPPED_SUBRESOURCE msr; + HRESULT hr = device->Map(this, 0, map_type, map_flags, &msr); + if(!SUCCEEDED(hr)) + return hr; + *out_data = msr.pData; + return S_OK; + } + + virtual void STDMETHODCALLTYPE Unmap() + { + device->Unmap(this, 0); + } +}; + +struct GalliumD3D10Texture1D : public GalliumD3D10Texture1DBase +{ + GalliumD3D10Texture1D(GalliumD3D10Screen* device, struct pipe_resource* resource, const D3D10_TEXTURE1D_DESC& desc, unsigned dxgi_usage) + : GalliumD3D10Texture1DBase(device, resource, desc, dxgi_usage) + {} + + virtual HRESULT STDMETHODCALLTYPE Map( + unsigned subresource, + D3D10_MAP map_type, + unsigned map_flags, + void **out_data) + { + D3D10_MAPPED_SUBRESOURCE msr; + HRESULT hr = device->Map(this, subresource, map_type, map_flags, &msr); + if(!SUCCEEDED(hr)) + return hr; + *out_data = msr.pData; + return S_OK; + } + + virtual void STDMETHODCALLTYPE Unmap( + unsigned subresource + ) + { + device->Unmap(this, subresource); + } +}; + +struct GalliumD3D10Texture2D : public GalliumD3D10Texture2DBase +{ + GalliumD3D10Texture2D() {} + GalliumD3D10Texture2D(GalliumD3D10Screen* device, struct pipe_resource* resource, const D3D10_TEXTURE2D_DESC& desc, unsigned dxgi_usage) + : GalliumD3D10Texture2DBase(device, resource, desc, dxgi_usage) + {} + + virtual HRESULT STDMETHODCALLTYPE Map( + unsigned subresource, + D3D10_MAP map_type, + unsigned map_flags, + D3D10_MAPPED_TEXTURE2D *out_mapped_subresource) + { + D3D10_MAPPED_SUBRESOURCE msr; + HRESULT hr = device->Map(this, subresource, map_type, map_flags, &msr); + if(!SUCCEEDED(hr)) + return hr; + out_mapped_subresource->pData = msr.pData; + out_mapped_subresource->RowPitch = msr.RowPitch; + return S_OK; + } + + virtual void STDMETHODCALLTYPE Unmap( + unsigned subresource + ) + { + device->Unmap(this, subresource); + } +}; + + +struct GalliumD3D10Texture3D : public GalliumD3D10Texture3DBase +{ + GalliumD3D10Texture3D(GalliumD3D10Screen* device, struct pipe_resource* resource, const D3D10_TEXTURE3D_DESC& desc, unsigned dxgi_usage) + : GalliumD3D10Texture3DBase(device, resource, desc, dxgi_usage) + {} + + virtual HRESULT STDMETHODCALLTYPE Map( + unsigned subresource, + D3D10_MAP map_type, + unsigned map_flags, + D3D10_MAPPED_TEXTURE3D *out_mapped_subresource) + { + D3D10_MAPPED_SUBRESOURCE msr; + HRESULT hr = device->Map(this, subresource, map_type, map_flags, &msr); + if(!SUCCEEDED(hr)) + return hr; + out_mapped_subresource->pData = msr.pData; + out_mapped_subresource->RowPitch = msr.RowPitch; + out_mapped_subresource->DepthPitch = msr.DepthPitch; + return S_OK; + } + + virtual void STDMETHODCALLTYPE Unmap( + unsigned subresource + ) + { + device->Unmap(this, subresource); + } +}; +#endif + +struct GalliumD3D11Surface : public GalliumMultiPrivateDataComObject<GalliumD3D11Texture2D, IDXGISurface1> +{ + GalliumD3D11Surface(GalliumD3D11Screen* device, struct pipe_resource* resource, const D3D11_TEXTURE2D_DESC& desc, unsigned dxgi_usage) + { + this->device = device; + this->device->AddRef(); + this->resource = resource; + this->desc = desc; + this->dxgi_usage = dxgi_usage; + } + + virtual HRESULT STDMETHODCALLTYPE GetDesc( + DXGI_SURFACE_DESC *out_desc) + { + out_desc->Format = this->desc.Format; + out_desc->Width = this->desc.Width; + out_desc->Height = this->desc.Height; + out_desc->SampleDesc = this->desc.SampleDesc; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetParent( + REFIID riid, + void **out_parent) + { + if(!device) + return E_NOINTERFACE; + return device->QueryInterface(riid, out_parent); + } + + /* TODO: somehow implement these */ + virtual HRESULT STDMETHODCALLTYPE GetDC( + BOOL discard, + HDC *out_hdc) + { + *out_hdc = 0; + return E_NOTIMPL; + } + + virtual HRESULT STDMETHODCALLTYPE ReleaseDC( + RECT *out_dirty_rect) + { + return E_NOTIMPL; + } + + virtual HRESULT STDMETHODCALLTYPE Map( + DXGI_MAPPED_RECT *out_locked_rect, + unsigned map_flags) + { + D3D11_MAP d3d_map; + if(map_flags & DXGI_MAP_DISCARD) + d3d_map = D3D11_MAP_WRITE_DISCARD; + else + { + if(map_flags & DXGI_MAP_READ) + { + if(map_flags & DXGI_MAP_WRITE) + d3d_map = D3D11_MAP_READ_WRITE; + else + d3d_map = D3D11_MAP_READ; + } + else + d3d_map = D3D11_MAP_WRITE; + } + D3D11_MAPPED_SUBRESOURCE d3d_mapped; + HRESULT hres = this->device->get_immediate_context()->Map(this, 0, d3d_map, 0, &d3d_mapped); + out_locked_rect->pBits = (uint8_t*)d3d_mapped.pData; + out_locked_rect->Pitch = d3d_mapped.RowPitch; + return hres; + } + + virtual HRESULT STDMETHODCALLTYPE Unmap(void) + { + this->device->get_immediate_context()->Unmap(this, 0); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE GetDevice( + REFIID riid, + void **out_parent) + { + if(!device) + return E_NOINTERFACE; + return device->QueryInterface(riid, out_parent); + } +}; + +template<typename Base, typename Desc, typename Object> +struct GalliumD3D11View : public GalliumD3D11DescribedObject<Base, Desc, Object> +{ + GalliumD3D11Resource<>* resource; + GalliumD3D11View(GalliumD3D11Screen* device, GalliumD3D11Resource<>* resource, Object* object, const Desc& desc) + : GalliumD3D11DescribedObject<Base, Desc, Object>(device, object, desc), resource(resource) + { + resource->AddRef(); + } + + ~GalliumD3D11View() + { + resource->Release(); + } + + virtual void STDMETHODCALLTYPE GetResource(ID3D11Resource** out_resource) + { + resource->AddRef(); + *out_resource = resource; + } +}; + +typedef GalliumD3D11View<ID3D11DepthStencilView, D3D11_DEPTH_STENCIL_VIEW_DESC, struct pipe_surface> GalliumD3D11DepthStencilView; +typedef GalliumD3D11View<ID3D11RenderTargetView, D3D11_RENDER_TARGET_VIEW_DESC, struct pipe_surface> GalliumD3D11RenderTargetView; + +#if API >= 11 +typedef GalliumD3D11View<ID3D11ShaderResourceView, D3D11_SHADER_RESOURCE_VIEW_DESC, struct pipe_sampler_view> GalliumD3D11ShaderResourceView; +#else +typedef GalliumD3D10View<ID3D10ShaderResourceView1, D3D10_SHADER_RESOURCE_VIEW_DESC1, struct pipe_sampler_view> GalliumD3D10ShaderResourceViewBase; + +struct GalliumD3D10ShaderResourceView : public GalliumD3D10ShaderResourceViewBase +{ + GalliumD3D10ShaderResourceView(GalliumD3D10Screen* device, GalliumD3D10Resource<>* resource, struct pipe_sampler_view* view, const D3D10_SHADER_RESOURCE_VIEW_DESC1& desc) + : GalliumD3D10ShaderResourceViewBase(device, resource, view, desc) + {} + + virtual void STDMETHODCALLTYPE GetDesc1(D3D10_SHADER_RESOURCE_VIEW_DESC1 *out_desc) + { + memcpy(out_desc, &desc, sizeof(*out_desc)); + } + + virtual void STDMETHODCALLTYPE GetDesc(D3D10_SHADER_RESOURCE_VIEW_DESC *out_desc) + { + memcpy(out_desc, &desc, sizeof(*out_desc)); + } +}; +#endif + +template<typename Base = ID3D11Asynchronous> +struct GalliumD3D11Asynchronous : public GalliumD3D11DeviceChild<Base> +{ + struct pipe_query* query; + unsigned data_size; + + GalliumD3D11Asynchronous(GalliumD3D11Screen* device, struct pipe_query* query, unsigned data_size) + : GalliumD3D11DeviceChild<Base>(device), query(query), data_size(data_size) + {} + + ~GalliumD3D11Asynchronous() + { + this->device->immediate_pipe->destroy_query(this->device->immediate_pipe, query); + } + + virtual unsigned STDMETHODCALLTYPE GetDataSize() + { + return data_size; + } + +#if API < 11 + virtual void STDMETHODCALLTYPE Begin() + { + this->device->Begin(this); + } + + virtual void STDMETHODCALLTYPE End() + { + this->device->End(this); + } + + virtual HRESULT STDMETHODCALLTYPE GetData( + void * out_data, + unsigned data_size, + unsigned get_data_flags) + { + return this->device->GetData(this, out_data, data_size, get_data_flags); + } +#endif +}; + +template<typename Base = ID3D11Asynchronous> +struct GalliumD3D11QueryOrPredicate : public GalliumD3D11Asynchronous<Base> +{ + D3D11_QUERY_DESC desc; + GalliumD3D11QueryOrPredicate(GalliumD3D11Screen* device, struct pipe_query* query, unsigned data_size, const D3D11_QUERY_DESC& desc) + : GalliumD3D11Asynchronous<Base>(device, query, data_size), desc(desc) + {} + + virtual void STDMETHODCALLTYPE GetDesc( + D3D11_QUERY_DESC *out_desc) + { + *out_desc = desc; + } +}; + +struct GalliumD3D11Query : public GalliumD3D11QueryOrPredicate<ID3D11Query> +{ + GalliumD3D11Query(GalliumD3D11Screen* device, struct pipe_query* query, unsigned data_size, const D3D11_QUERY_DESC& desc) + : GalliumD3D11QueryOrPredicate<ID3D11Query>(device, query, data_size, desc) + {} +}; + +struct GalliumD3D11Predicate : public GalliumD3D11QueryOrPredicate<ID3D11Predicate> +{ + GalliumD3D11Predicate(GalliumD3D11Screen* device, struct pipe_query* query, unsigned data_size, const D3D11_QUERY_DESC& desc) + : GalliumD3D11QueryOrPredicate<ID3D11Predicate>(device, query, data_size, desc) + {} + + ~GalliumD3D11Predicate() + { + DX10_ONLY(device->UnbindPredicate(this)); + } +}; + +struct GalliumD3D11Counter : public GalliumD3D11Asynchronous<ID3D11Counter> +{ + D3D11_COUNTER_DESC desc; + GalliumD3D11Counter(GalliumD3D11Screen* device, struct pipe_query* query, unsigned data_size, const D3D11_COUNTER_DESC& desc) + : GalliumD3D11Asynchronous<ID3D11Counter>(device, query, data_size), desc(desc) + {} + + virtual void STDMETHODCALLTYPE GetDesc( + D3D11_COUNTER_DESC *out_desc) + { + *out_desc = desc; + } +}; diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h new file mode 100644 index 0000000000..95ea4e00fc --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h @@ -0,0 +1,1459 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +DEBUG_GET_ONCE_BOOL_OPTION(dump_shaders, "D3D1X_DUMP_SHADERS", FALSE); + +/* These cap sets are much more correct than the ones in u_caps.c */ +/* TODO: it seems cube levels should be the same as 2D levels */ + +/* DX 9_1 */ +static unsigned caps_dx_9_1[] = { + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 8), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_TERMINATE +}; + +/* DX 9_2 */ +static unsigned caps_dx_9_2[] = { + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_CAP(TWO_SIDED_STENCIL), + UTIL_CHECK_CAP(TEXTURE_MIRROR_CLAMP), + UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_TERMINATE +}; + +/* DX 9_3 */ +static unsigned caps_dx_9_3[] = { + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_CAP(TWO_SIDED_STENCIL), + UTIL_CHECK_CAP(TEXTURE_MIRROR_CLAMP), + UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE), + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 4), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 13), /* 4096 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_TERMINATE +}; + + +// this is called "screen" because in the D3D10 case it's only part of the device +template<bool threadsafe> +struct GalliumD3D11ScreenImpl : public GalliumD3D11Screen +{ + D3D_FEATURE_LEVEL feature_level; + int format_support[PIPE_FORMAT_COUNT]; + unsigned creation_flags; + unsigned exception_mode; + maybe_mutex_t<threadsafe> mutex; + +/* TODO: Direct3D 11 specifies that fine-grained locking should be used if the driver supports it. + * Right now, I don't trust Gallium drivers to get this right. + */ +#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(mutex) + + GalliumD3D11ScreenImpl(struct pipe_screen* screen, struct pipe_context* immediate_pipe, BOOL owns_immediate_pipe,unsigned creation_flags, IDXGIAdapter* adapter) + : GalliumD3D11Screen(screen, immediate_pipe, adapter), creation_flags(creation_flags) + { + memset(&screen_caps, 0, sizeof(screen_caps)); + screen_caps.gs = screen->get_shader_param(screen, PIPE_SHADER_GEOMETRY, PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0; + screen_caps.so = !!screen->get_param(screen, PIPE_CAP_STREAM_OUTPUT); + screen_caps.queries = screen->get_param(screen, PIPE_CAP_OCCLUSION_QUERY); + screen_caps.render_condition = screen_caps.queries; + for(unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) + screen_caps.constant_buffers[i] = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_CONST_BUFFERS); + screen_caps.stages = 0; + for(unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) + { + if(!screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_INSTRUCTIONS)) + break; + screen_caps.stages = i + 1; + } + + screen_caps.stages_with_sampling = (1 << screen_caps.stages) - 1; + if(!screen->get_param(screen, PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS)) + screen_caps.stages_with_sampling &=~ (1 << PIPE_SHADER_VERTEX); + + memset(format_support, 0xff, sizeof(format_support)); + + float default_level; + /* don't even attempt to autodetect D3D10 level support, since it's just not fully implemented yet */ + if(util_check_caps(screen, caps_dx_9_3)) + default_level = 9.3; + else if(util_check_caps(screen, caps_dx_9_2)) + default_level = 9.2; + else if(util_check_caps(screen, caps_dx_9_1)) + default_level = 9.1; + else + { + _debug_printf("Warning: driver does not even meet D3D_FEATURE_LEVEL_9_1 features, advertising it anyway!\n"); + default_level = 9.1; + } + + char default_level_name[64]; + sprintf(default_level_name, "%.1f", default_level); + float feature_level_number = atof(debug_get_option("D3D11_FEATURE_LEVEL", default_level_name)); + if(!feature_level_number) + feature_level_number = default_level; + +#if API >= 11 + if(feature_level_number >= 11.0f) + feature_level = D3D_FEATURE_LEVEL_11_0; + else +#endif + if(feature_level_number >= 10.1f) + feature_level = D3D_FEATURE_LEVEL_10_1; + else if(feature_level_number >= 10.0f) + feature_level = D3D_FEATURE_LEVEL_10_0; + else if(feature_level_number >= 9.3f) + feature_level = D3D_FEATURE_LEVEL_9_3; + else if(feature_level_number >= 9.2f) + feature_level = D3D_FEATURE_LEVEL_9_2; + else + feature_level = D3D_FEATURE_LEVEL_9_1; + +#if API >= 11 + immediate_context = GalliumD3D11ImmediateDeviceContext_Create(this, immediate_pipe, owns_immediate_pipe); + // release to the reference to ourselves that the immediate context took, to avoid a garbage cycle + immediate_context->Release(); +#endif + } + + ~GalliumD3D11ScreenImpl() + { +#if API >= 11 + GalliumD3D11ImmediateDeviceContext_Destroy(immediate_context); +#endif + } + + virtual D3D_FEATURE_LEVEL STDMETHODCALLTYPE GetFeatureLevel(void) + { + return feature_level; + } + + virtual unsigned STDMETHODCALLTYPE GetCreationFlags(void) + { + return creation_flags; + } + + virtual HRESULT STDMETHODCALLTYPE GetDeviceRemovedReason(void) + { + return S_OK; + } + +#if API >= 11 + virtual void STDMETHODCALLTYPE GetImmediateContext( + ID3D11DeviceContext **out_immediate_context) + { + immediate_context->AddRef(); + *out_immediate_context = immediate_context; + } +#endif + + virtual HRESULT STDMETHODCALLTYPE SetExceptionMode(unsigned RaiseFlags) + { + exception_mode = RaiseFlags; + return S_OK; + } + + virtual unsigned STDMETHODCALLTYPE GetExceptionMode(void) + { + return exception_mode; + } + + virtual HRESULT STDMETHODCALLTYPE CheckCounter( + const D3D11_COUNTER_DESC *desc, + D3D11_COUNTER_TYPE *type, + unsigned *active_counters, + LPSTR sz_name, + unsigned *name_length, + LPSTR sz_units, + unsigned *units_length, + LPSTR sz_description, + unsigned *description_length) + { + return E_NOTIMPL; + } + + virtual void STDMETHODCALLTYPE CheckCounterInfo( + D3D11_COUNTER_INFO *counter_info) + { + /* none supported at the moment */ + counter_info->LastDeviceDependentCounter = (D3D11_COUNTER)0; + counter_info->NumDetectableParallelUnits = 1; + counter_info->NumSimultaneousCounters = 0; + } + +#if API >= 11 + virtual HRESULT STDMETHODCALLTYPE CheckFeatureSupport( + D3D11_FEATURE feature, + void *out_feature_support_data, + unsigned feature_support_data_size) + { + SYNCHRONIZED; + + switch(feature) + { + case D3D11_FEATURE_THREADING: + { + D3D11_FEATURE_DATA_THREADING* data = (D3D11_FEATURE_DATA_THREADING*)out_feature_support_data; + if(feature_support_data_size != sizeof(*data)) + return E_INVALIDARG; + + data->DriverCommandLists = FALSE; + data->DriverConcurrentCreates = FALSE; + return S_OK; + } + case D3D11_FEATURE_DOUBLES: + { + D3D11_FEATURE_DATA_DOUBLES* data = (D3D11_FEATURE_DATA_DOUBLES*)out_feature_support_data; + if(feature_support_data_size != sizeof(*data)) + return E_INVALIDARG; + + data->DoublePrecisionFloatShaderOps = FALSE; + return S_OK; + } + case D3D11_FEATURE_FORMAT_SUPPORT: + { + D3D11_FEATURE_DATA_FORMAT_SUPPORT* data = (D3D11_FEATURE_DATA_FORMAT_SUPPORT*)out_feature_support_data; + if(feature_support_data_size != sizeof(*data)) + return E_INVALIDARG; + + return this->CheckFormatSupport(data->InFormat, &data->OutFormatSupport); + } + case D3D11_FEATURE_FORMAT_SUPPORT2: + { + D3D11_FEATURE_DATA_FORMAT_SUPPORT* data = (D3D11_FEATURE_DATA_FORMAT_SUPPORT*)out_feature_support_data; + if(feature_support_data_size != sizeof(*data)) + return E_INVALIDARG; + + data->OutFormatSupport = 0; + /* TODO: should this be S_OK? */ + return E_INVALIDARG; + } + case D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS: + { + D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS* data = (D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS*)out_feature_support_data; + if(feature_support_data_size != sizeof(*data)) + return E_INVALIDARG; + + data->ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x = FALSE; + return S_OK; + } + default: + return E_INVALIDARG; + } + } +#endif + + virtual HRESULT STDMETHODCALLTYPE CheckFormatSupport( + DXGI_FORMAT dxgi_format, + unsigned *out_format_support + ) + { + SYNCHRONIZED; + + /* TODO: MSAA, advanced features */ + pipe_format format = dxgi_to_pipe_format[dxgi_format]; + if(!format) + return E_INVALIDARG; + + int support = format_support[format]; + if(support < 0) + { + support = 0; + unsigned buffer = D3D11_FORMAT_SUPPORT_BUFFER | D3D11_FORMAT_SUPPORT_IA_VERTEX_BUFFER | D3D11_FORMAT_SUPPORT_IA_INDEX_BUFFER; + unsigned sampler_view = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE | D3D11_FORMAT_SUPPORT_MIP | D3D11_FORMAT_SUPPORT_MIP_AUTOGEN; + if(util_format_is_depth_or_stencil(format)) + sampler_view |= D3D11_FORMAT_SUPPORT_SHADER_SAMPLE_COMPARISON; + + /* TODO: do this properly when Gallium drivers actually support index/vertex format queries */ + if(screen->is_format_supported(screen, format, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER, 0) + || (screen->is_format_supported(screen, format, PIPE_BUFFER, 0, PIPE_BIND_INDEX_BUFFER, 0) + || format == PIPE_FORMAT_R8_UNORM)) + support |= buffer; + if(screen->is_format_supported(screen, format, PIPE_BUFFER, 0, PIPE_BIND_STREAM_OUTPUT, 0)) + support |= buffer | D3D11_FORMAT_SUPPORT_SO_BUFFER; + if(screen->is_format_supported(screen, format, PIPE_TEXTURE_1D, 0, PIPE_BIND_SAMPLER_VIEW, 0)) + support |= D3D11_FORMAT_SUPPORT_TEXTURE1D | sampler_view; + if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW, 0)) + support |= D3D11_FORMAT_SUPPORT_TEXTURE2D | sampler_view; + if(screen->is_format_supported(screen, format, PIPE_TEXTURE_CUBE, 0, PIPE_BIND_SAMPLER_VIEW, 0)) + support |= D3D11_FORMAT_SUPPORT_TEXTURE2D | sampler_view; + if(screen->is_format_supported(screen, format, PIPE_TEXTURE_3D, 0, PIPE_BIND_SAMPLER_VIEW, 0)) + support |= D3D11_FORMAT_SUPPORT_TEXTURE3D | sampler_view; + if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET, 0)) + support |= D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_RENDER_TARGET | D3D11_FORMAT_SUPPORT_BLENDABLE; + if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_DEPTH_STENCIL, 0)) + support |= D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_DEPTH_STENCIL; + if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_DISPLAY_TARGET, 0)) + support |= D3D11_FORMAT_SUPPORT_DISPLAY; + format_support[format] = support; + } + *out_format_support = support; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CheckMultisampleQualityLevels( + DXGI_FORMAT format, + unsigned sample_count, + unsigned *pcount + ) + { + SYNCHRONIZED; + + if(sample_count == 1) + *pcount = 1; + else + *pcount = 0; + return S_OK; + } + + template<typename T, typename U> + bool convert_blend_state(T& to, const U& from, unsigned BlendEnable, unsigned RenderTargetWriteMask) + { + if(invalid(0 + || from.SrcBlend >= D3D11_BLEND_COUNT + || from.SrcBlendAlpha >= D3D11_BLEND_COUNT + || from.DestBlend >= D3D11_BLEND_COUNT + || from.DestBlendAlpha >= D3D11_BLEND_COUNT + || from.BlendOp >= 6 + || from.BlendOpAlpha >= 6 + || !from.BlendOp + || !from.BlendOpAlpha + )) + return false; + + to.blend_enable = BlendEnable; + + to.rgb_func = from.BlendOp - 1; + to.alpha_func = from.BlendOpAlpha - 1; + + to.rgb_src_factor = d3d11_to_pipe_blend[from.SrcBlend]; + to.alpha_src_factor = d3d11_to_pipe_blend[from.SrcBlendAlpha]; + to.rgb_dst_factor = d3d11_to_pipe_blend[from.DestBlend]; + to.alpha_dst_factor = d3d11_to_pipe_blend[from.DestBlendAlpha]; + + to.colormask = RenderTargetWriteMask & 0xf; + return true; + } + +#if API >= 11 + virtual HRESULT STDMETHODCALLTYPE CreateBlendState( + const D3D11_BLEND_DESC *blend_state_desc, + ID3D11BlendState **out_blend_state + ) +#else + virtual HRESULT STDMETHODCALLTYPE CreateBlendState1( + const D3D10_BLEND_DESC1 *blend_state_desc, + ID3D10BlendState1 **out_blend_state + ) +#endif + { + SYNCHRONIZED; + + pipe_blend_state state; + memset(&state, 0, sizeof(state)); + state.alpha_to_coverage = !!blend_state_desc->AlphaToCoverageEnable; + state.independent_blend_enable = !!blend_state_desc->IndependentBlendEnable; + assert(PIPE_MAX_COLOR_BUFS >= 8); + for(unsigned i = 0; i < 8; ++i) + { + if(!convert_blend_state( + state.rt[i], + blend_state_desc->RenderTarget[i], + blend_state_desc->RenderTarget[i].BlendEnable, + blend_state_desc->RenderTarget[i].RenderTargetWriteMask)) + return E_INVALIDARG; + } + + if(!out_blend_state) + return S_FALSE; + + void* object = immediate_pipe->create_blend_state(immediate_pipe, &state); + if(!object) + return E_FAIL; + + *out_blend_state = new GalliumD3D11BlendState(this, object, *blend_state_desc); + return S_OK; + } + +#if API < 11 + virtual HRESULT STDMETHODCALLTYPE CreateBlendState( + const D3D10_BLEND_DESC *blend_state_desc, + ID3D10BlendState **out_blend_state + ) + { + SYNCHRONIZED; + + pipe_blend_state state; + memset(&state, 0, sizeof(state)); + state.alpha_to_coverage = !!blend_state_desc->AlphaToCoverageEnable; + assert(PIPE_MAX_COLOR_BUFS >= 8); + for(unsigned i = 0; i < 8; ++i) + { + if(!convert_blend_state( + state.rt[i], + *blend_state_desc, + blend_state_desc->BlendEnable[i], + blend_state_desc->RenderTargetWriteMask[i])) + return E_INVALIDARG; + } + + for(unsigned i = 1; i < 8; ++i) + { + if(memcmp(&state.rt[0], &state.rt[i], sizeof(state.rt[0]))) + { + state.independent_blend_enable = TRUE; + break; + } + } + + void* object = immediate_pipe->create_blend_state(immediate_pipe, &state); + if(!object) + return E_FAIL; + + *out_blend_state = new GalliumD3D11BlendState(this, object, *blend_state_desc); + return S_OK; + } +#endif + + virtual HRESULT STDMETHODCALLTYPE CreateDepthStencilState( + const D3D11_DEPTH_STENCIL_DESC *depth_stencil_state_desc, + ID3D11DepthStencilState **depth_stencil_state + ) + { + SYNCHRONIZED; + + pipe_depth_stencil_alpha_state state; + memset(&state, 0, sizeof(state)); + state.depth.enabled = !!depth_stencil_state_desc->DepthEnable; + state.depth.writemask = depth_stencil_state_desc->DepthWriteMask; + state.depth.func = depth_stencil_state_desc->DepthFunc - 1; + state.stencil[0].enabled = !!depth_stencil_state_desc->StencilEnable; + state.stencil[0].writemask = depth_stencil_state_desc->StencilWriteMask; + state.stencil[0].valuemask = depth_stencil_state_desc->StencilReadMask; + state.stencil[0].zpass_op = d3d11_to_pipe_stencil_op[depth_stencil_state_desc->FrontFace.StencilPassOp]; + state.stencil[0].fail_op = d3d11_to_pipe_stencil_op[depth_stencil_state_desc->FrontFace.StencilFailOp]; + state.stencil[0].zfail_op = d3d11_to_pipe_stencil_op[depth_stencil_state_desc->FrontFace.StencilDepthFailOp]; + state.stencil[0].func = depth_stencil_state_desc->FrontFace.StencilFunc - 1; + state.stencil[1].enabled = !!depth_stencil_state_desc->StencilEnable; + state.stencil[1].writemask = depth_stencil_state_desc->StencilWriteMask; + state.stencil[1].valuemask = depth_stencil_state_desc->StencilReadMask; + state.stencil[1].zpass_op = d3d11_to_pipe_stencil_op[depth_stencil_state_desc->BackFace.StencilPassOp]; + state.stencil[1].fail_op = d3d11_to_pipe_stencil_op[depth_stencil_state_desc->BackFace.StencilFailOp]; + state.stencil[1].zfail_op = d3d11_to_pipe_stencil_op[depth_stencil_state_desc->BackFace.StencilDepthFailOp]; + state.stencil[1].func = depth_stencil_state_desc->BackFace.StencilFunc - 1; + + if(!depth_stencil_state) + return S_FALSE; + + void* object = immediate_pipe->create_depth_stencil_alpha_state(immediate_pipe, &state); + if(!object) + return E_FAIL; + + *depth_stencil_state = new GalliumD3D11DepthStencilState(this, object, *depth_stencil_state_desc); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreateRasterizerState( + const D3D11_RASTERIZER_DESC *rasterizer_desc, + ID3D11RasterizerState **out_rasterizer_state) + { + SYNCHRONIZED; + + pipe_rasterizer_state state; + memset(&state, 0, sizeof(state)); + state.gl_rasterization_rules = 1; /* D3D10/11 use GL rules */ + state.fill_front = state.fill_back = (rasterizer_desc->FillMode == D3D11_FILL_WIREFRAME) ? PIPE_POLYGON_MODE_LINE : PIPE_POLYGON_MODE_FILL; + if(rasterizer_desc->CullMode == D3D11_CULL_FRONT) + state.cull_face = PIPE_FACE_FRONT; + else if(rasterizer_desc->CullMode == D3D11_CULL_BACK) + state.cull_face = PIPE_FACE_BACK; + else + state.cull_face = PIPE_FACE_NONE; + state.front_ccw = !!rasterizer_desc->FrontCounterClockwise; + /* TODO: is this correct? */ + /* TODO: we are ignoring depthBiasClamp! */ + state.offset_tri = state.offset_line = state.offset_point = rasterizer_desc->SlopeScaledDepthBias || rasterizer_desc->DepthBias; + state.offset_scale = rasterizer_desc->SlopeScaledDepthBias; + state.offset_units = rasterizer_desc->DepthBias; + state.scissor = !!rasterizer_desc->ScissorEnable; + state.multisample = !!rasterizer_desc->MultisampleEnable; + state.line_smooth = !!rasterizer_desc->AntialiasedLineEnable; + + /* TODO: is this correct? */ + state.point_quad_rasterization = 1; + + if(!out_rasterizer_state) + return S_FALSE; + + void* object = immediate_pipe->create_rasterizer_state(immediate_pipe, &state); + if(!object) + return E_FAIL; + + *out_rasterizer_state = new GalliumD3D11RasterizerState(this, object, *rasterizer_desc, !rasterizer_desc->DepthClipEnable); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreateSamplerState( + const D3D11_SAMPLER_DESC *sampler_desc, + ID3D11SamplerState **out_sampler_state) + { + SYNCHRONIZED; + + pipe_sampler_state state; + memset(&state, 0, sizeof(state)); + state.normalized_coords = 1; + state.min_mip_filter = (sampler_desc->Filter & 1); + state.mag_img_filter = ((sampler_desc->Filter >> 2) & 1); + state.min_img_filter = ((sampler_desc->Filter >> 4) & 1); + if(sampler_desc->Filter & 0x40) + state.max_anisotropy = sampler_desc->MaxAnisotropy; + if(sampler_desc->Filter & 0x80) + { + state.compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; + state.compare_func = sampler_desc->ComparisonFunc; + } + state.wrap_s = d3d11_to_pipe_wrap[sampler_desc->AddressU]; + state.wrap_t = d3d11_to_pipe_wrap[sampler_desc->AddressV]; + state.wrap_r = d3d11_to_pipe_wrap[sampler_desc->AddressW]; + state.lod_bias = sampler_desc->MipLODBias; + memcpy(state.border_color, sampler_desc->BorderColor, sizeof(state.border_color)); + state.min_lod = sampler_desc->MinLOD; + state.max_lod = sampler_desc->MaxLOD; + + if(!out_sampler_state) + return S_FALSE; + + void* object = immediate_pipe->create_sampler_state(immediate_pipe, &state); + if(!object) + return E_FAIL; + + *out_sampler_state = new GalliumD3D11SamplerState(this, object, *sampler_desc); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreateInputLayout( + const D3D11_INPUT_ELEMENT_DESC *input_element_descs, + unsigned count, + const void *shader_bytecode_with_input_signature, + SIZE_T bytecode_length, + ID3D11InputLayout **out_input_layout) + { + SYNCHRONIZED; + + if(count > D3D11_IA_VERTEX_INPUT_STRUCTURE_ELEMENT_COUNT) + return E_INVALIDARG; + assert(D3D11_IA_VERTEX_INPUT_STRUCTURE_ELEMENT_COUNT <= PIPE_MAX_ATTRIBS); + + // putting semantics matching in the core API seems to be a (minor) design mistake + + struct dxbc_chunk_signature* sig = dxbc_find_signature(shader_bytecode_with_input_signature, bytecode_length, false); + D3D11_SIGNATURE_PARAMETER_DESC* params; + unsigned num_params = dxbc_parse_signature(sig, ¶ms); + + typedef std::unordered_map<std::pair<c_string, unsigned>, unsigned> semantic_to_idx_map_t; + semantic_to_idx_map_t semantic_to_idx_map; + for(unsigned i = 0; i < count; ++i) + semantic_to_idx_map[std::make_pair(c_string(input_element_descs[i].SemanticName), input_element_descs[i].SemanticIndex)] = i; + + struct pipe_vertex_element elements[D3D11_IA_VERTEX_INPUT_STRUCTURE_ELEMENT_COUNT]; + + unsigned num_params_to_use = std::min(num_params, (unsigned)D3D11_IA_VERTEX_INPUT_STRUCTURE_ELEMENT_COUNT); + for(unsigned i = 0; i < num_params_to_use; ++i) + { + int idx = -1; + semantic_to_idx_map_t::iterator iter = semantic_to_idx_map.find(std::make_pair(c_string(params[i].SemanticName), params[i].SemanticIndex)); + if(iter != semantic_to_idx_map.end()) + idx = iter->second; + + // TODO: I kind of doubt Gallium drivers will like null elements; should we do something about it, either here, in the interface, or in the drivers? + // TODO: also, in which cases should we return errors? (i.e. duplicate semantics in vs, duplicate semantics in layout, unmatched semantic in vs, unmatched semantic in layout) + memset(&elements[i], 0, sizeof(elements[i])); + if(idx >= 0) + { + elements[i].src_format = dxgi_to_pipe_format[input_element_descs[idx].Format]; + elements[i].src_offset = input_element_descs[idx].AlignedByteOffset; + elements[i].vertex_buffer_index = input_element_descs[idx].InputSlot; + elements[i].instance_divisor = input_element_descs[idx].InstanceDataStepRate; + } + } + + free(params); + + if(!out_input_layout) + return S_FALSE; + + void* object = immediate_pipe->create_vertex_elements_state(immediate_pipe, num_params_to_use, elements); + if(!object) + return E_FAIL; + + *out_input_layout = new GalliumD3D11InputLayout(this, object); + return S_OK; + } + + static unsigned d3d11_to_pipe_bind_flags(unsigned bind_flags) + { + unsigned bind = 0; + if(bind_flags & D3D11_BIND_VERTEX_BUFFER) + bind |= PIPE_BIND_VERTEX_BUFFER; + if(bind_flags & D3D11_BIND_INDEX_BUFFER) + bind |= PIPE_BIND_INDEX_BUFFER; + if(bind_flags & D3D11_BIND_CONSTANT_BUFFER) + bind |= PIPE_BIND_CONSTANT_BUFFER; + if(bind_flags & D3D11_BIND_SHADER_RESOURCE) + bind |= PIPE_BIND_SAMPLER_VIEW; + if(bind_flags & D3D11_BIND_STREAM_OUTPUT) + bind |= PIPE_BIND_STREAM_OUTPUT; + if(bind_flags & D3D11_BIND_RENDER_TARGET) + bind |= PIPE_BIND_RENDER_TARGET; + if(bind_flags & D3D11_BIND_DEPTH_STENCIL) + bind |= PIPE_BIND_DEPTH_STENCIL; + return bind; + } + + inline HRESULT create_resource( + pipe_texture_target target, + unsigned width, + unsigned height, + unsigned depth, + unsigned mip_levels, + unsigned array_size, + DXGI_FORMAT format, + const DXGI_SAMPLE_DESC* SampleDesc, + D3D11_USAGE usage, + unsigned bind_flags, + unsigned c_p_u_access_flags, + unsigned misc_flags, + const D3D11_SUBRESOURCE_DATA *initial_data, + DXGI_USAGE dxgi_usage, + struct pipe_resource** ppresource + ) + { + if(invalid(format >= DXGI_FORMAT_COUNT)) + return E_INVALIDARG; + if(misc_flags & D3D11_RESOURCE_MISC_TEXTURECUBE) + { + if(target != PIPE_TEXTURE_2D) + return E_INVALIDARG; + target = PIPE_TEXTURE_CUBE; + + if(array_size != 6) + return E_NOTIMPL; + } + else + { + if(array_size > 1) + return E_NOTIMPL; + array_size = 1; + } + /* TODO: msaa */ + struct pipe_resource templat; + memset(&templat, 0, sizeof(templat)); + templat.target = target; + templat.width0 = width; + templat.height0 = height; + templat.depth0 = depth; + if(mip_levels) + templat.last_level = mip_levels - 1; + else + templat.last_level = MAX2(MAX2(util_logbase2(templat.width0), util_logbase2(templat.height0)), util_logbase2(templat.depth0)); + templat.format = dxgi_to_pipe_format[format]; + templat.bind = d3d11_to_pipe_bind_flags(bind_flags); + if(c_p_u_access_flags & D3D11_CPU_ACCESS_READ) + templat.bind |= PIPE_BIND_TRANSFER_READ; + if(c_p_u_access_flags & D3D11_CPU_ACCESS_WRITE) + templat.bind |= PIPE_BIND_TRANSFER_WRITE; + if(misc_flags & D3D11_RESOURCE_MISC_SHARED) + templat.bind |= PIPE_BIND_SHARED; + if(misc_flags & D3D11_RESOURCE_MISC_GDI_COMPATIBLE) + templat.bind |= PIPE_BIND_TRANSFER_READ | PIPE_BIND_TRANSFER_WRITE; + if(dxgi_usage & DXGI_USAGE_BACK_BUFFER) + templat.bind |= PIPE_BIND_DISPLAY_TARGET; + templat.usage = d3d11_to_pipe_usage[usage]; + if(invalid(!templat.format)) + return E_NOTIMPL; + + if(!ppresource) + return S_FALSE; + + struct pipe_resource* resource = screen->resource_create(screen, &templat); + if(!resource) + return E_FAIL; + if(initial_data) + { + for(unsigned slice = 0; slice < array_size; ++slice) + { + for(unsigned level = 0; level <= templat.last_level; ++level) + { + struct pipe_subresource sr; + sr.level = level; + sr.face = slice; + struct pipe_box box; + box.x = box.y = box.z = 0; + box.width = u_minify(width, level); + box.height = u_minify(height, level); + box.depth = u_minify(depth, level); + immediate_pipe->transfer_inline_write(immediate_pipe, resource, sr, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | PIPE_TRANSFER_UNSYNCHRONIZED, &box, initial_data->pSysMem, initial_data->SysMemPitch, initial_data->SysMemSlicePitch); + ++initial_data; + } + } + } + *ppresource = resource; + return S_OK; + } + + static unsigned d3d_to_dxgi_usage(unsigned bind, unsigned misc) + { + unsigned dxgi_usage = 0; + if(bind |= D3D11_BIND_RENDER_TARGET) + dxgi_usage |= DXGI_USAGE_RENDER_TARGET_OUTPUT; + if(bind & D3D11_BIND_SHADER_RESOURCE) + dxgi_usage |= DXGI_USAGE_SHADER_INPUT; +#if API >= 11 + if(bind & D3D11_BIND_UNORDERED_ACCESS) + dxgi_usage |= DXGI_USAGE_UNORDERED_ACCESS; +#endif + if(misc & D3D11_RESOURCE_MISC_SHARED) + dxgi_usage |= DXGI_USAGE_SHARED; + return dxgi_usage; + } + + virtual HRESULT STDMETHODCALLTYPE CreateTexture1D( + const D3D11_TEXTURE1D_DESC *desc, + const D3D11_SUBRESOURCE_DATA *initial_data, + ID3D11Texture1D **out_texture1d) + { + SYNCHRONIZED; + + struct pipe_resource* resource; + DXGI_USAGE dxgi_usage = d3d_to_dxgi_usage(desc->BindFlags, desc->MiscFlags); + HRESULT hr = create_resource(PIPE_TEXTURE_1D, desc->Width, 1, 1, desc->MipLevels, desc->ArraySize, desc->Format, 0, desc->Usage, desc->BindFlags, desc->CPUAccessFlags, desc->MiscFlags, initial_data, dxgi_usage, out_texture1d ? &resource : 0); + if(hr != S_OK) + return hr; + D3D11_TEXTURE1D_DESC cdesc = *desc; + cdesc.MipLevels = resource->last_level + 1; + *out_texture1d = new GalliumD3D11Texture1D(this, resource, cdesc, dxgi_usage); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreateTexture2D( + const D3D11_TEXTURE2D_DESC *desc, + const D3D11_SUBRESOURCE_DATA *initial_data, + ID3D11Texture2D **out_texture2d) + { + SYNCHRONIZED; + + struct pipe_resource* resource; + DXGI_USAGE dxgi_usage = d3d_to_dxgi_usage(desc->BindFlags, desc->MiscFlags); + HRESULT hr = create_resource(PIPE_TEXTURE_2D, desc->Width, desc->Height, 1, desc->MipLevels, desc->ArraySize, desc->Format, &desc->SampleDesc, desc->Usage, desc->BindFlags, desc->CPUAccessFlags, desc->MiscFlags, initial_data, dxgi_usage, out_texture2d ? &resource : 0); + if(hr != S_OK) + return hr; + D3D11_TEXTURE2D_DESC cdesc = *desc; + cdesc.MipLevels = resource->last_level + 1; + if(cdesc.MipLevels == 1 && cdesc.ArraySize == 1) + *out_texture2d = new GalliumD3D11Surface(this, resource, cdesc, dxgi_usage); + else + *out_texture2d = new GalliumD3D11Texture2D(this, resource, cdesc, dxgi_usage); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreateTexture3D( + const D3D11_TEXTURE3D_DESC *desc, + const D3D11_SUBRESOURCE_DATA *initial_data, + ID3D11Texture3D **out_texture3d) + { + SYNCHRONIZED; + + struct pipe_resource* resource; + DXGI_USAGE dxgi_usage = d3d_to_dxgi_usage(desc->BindFlags, desc->MiscFlags); + HRESULT hr = create_resource(PIPE_TEXTURE_3D, desc->Width, desc->Height, desc->Depth, desc->MipLevels, 1, desc->Format, 0, desc->Usage, desc->BindFlags, desc->CPUAccessFlags, desc->MiscFlags, initial_data, dxgi_usage, out_texture3d ? &resource : 0); + if(hr != S_OK) + return hr; + D3D11_TEXTURE3D_DESC cdesc = *desc; + cdesc.MipLevels = resource->last_level + 1; + *out_texture3d = new GalliumD3D11Texture3D(this, resource, cdesc, dxgi_usage); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreateBuffer( + const D3D11_BUFFER_DESC *desc, + const D3D11_SUBRESOURCE_DATA *initial_data, + ID3D11Buffer **out_buffer) + { + SYNCHRONIZED; + + struct pipe_resource* resource; + DXGI_USAGE dxgi_usage = d3d_to_dxgi_usage(desc->BindFlags, desc->MiscFlags); + HRESULT hr = create_resource(PIPE_BUFFER, desc->ByteWidth, 1, 1, 1, 1, DXGI_FORMAT_R8_UNORM, 0, desc->Usage, desc->BindFlags, desc->CPUAccessFlags, desc->MiscFlags, initial_data, dxgi_usage, out_buffer ? &resource : 0); + if(hr != S_OK) + return hr; + *out_buffer = new GalliumD3D11Buffer(this, resource, *desc, dxgi_usage); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE OpenGalliumResource( + struct pipe_resource* resource, + IUnknown** dxgi_resource) + { + SYNCHRONIZED; + + /* TODO: maybe support others */ + assert(resource->target == PIPE_TEXTURE_2D); + *dxgi_resource = 0; + D3D11_TEXTURE2D_DESC desc; + memset(&desc, 0, sizeof(desc)); + desc.Width = resource->width0; + desc.Height = resource->height0; + init_pipe_to_dxgi_format(); + desc.Format = pipe_to_dxgi_format[resource->format]; + desc.SampleDesc.Count = resource->nr_samples; + desc.SampleDesc.Quality = 0; + desc.ArraySize = 1; + desc.MipLevels = resource->last_level + 1; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + if(resource->bind & PIPE_BIND_RENDER_TARGET) + desc.BindFlags |= D3D11_BIND_RENDER_TARGET; + if(resource->bind & PIPE_BIND_DEPTH_STENCIL) + desc.BindFlags |= D3D11_BIND_DEPTH_STENCIL; + if(resource->bind & PIPE_BIND_SAMPLER_VIEW) + desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE; + if(resource->bind & PIPE_BIND_SHARED) + desc.MiscFlags |= D3D11_RESOURCE_MISC_SHARED; + DXGI_USAGE dxgi_usage = d3d_to_dxgi_usage(desc.BindFlags, desc.MiscFlags); + if(desc.MipLevels == 1 && desc.ArraySize == 1) + *dxgi_resource = (ID3D11Texture2D*)new GalliumD3D11Surface(this, resource, desc, dxgi_usage); + else + *dxgi_resource = (ID3D11Texture2D*)new GalliumD3D11Texture2D(this, resource, desc, dxgi_usage); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreateSurface( + const DXGI_SURFACE_DESC *dxgi_desc, + unsigned count, + DXGI_USAGE usage, + const DXGI_SHARED_RESOURCE *shared_resource, + IDXGISurface **out_surface) + { + SYNCHRONIZED; + + D3D11_TEXTURE2D_DESC desc; + memset(&desc, 0, sizeof(desc)); + + struct pipe_resource* resource; + desc.Width = dxgi_desc->Width; + desc.Height = dxgi_desc->Height; + desc.Format = dxgi_desc->Format; + desc.SampleDesc = dxgi_desc->SampleDesc; + desc.ArraySize = count; + desc.MipLevels = 1; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + if(usage & DXGI_USAGE_RENDER_TARGET_OUTPUT) + desc.BindFlags |= D3D11_BIND_RENDER_TARGET; + if(usage & DXGI_USAGE_SHADER_INPUT) + desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE; +#if API >= 11 + if(usage & DXGI_USAGE_UNORDERED_ACCESS) + desc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS; +#endif + if(usage & DXGI_USAGE_SHARED) + desc.MiscFlags |= D3D11_RESOURCE_MISC_SHARED; + HRESULT hr = create_resource(PIPE_TEXTURE_2D, dxgi_desc->Width, dxgi_desc->Height, 1, 1, count, dxgi_desc->Format, &dxgi_desc->SampleDesc, D3D11_USAGE_DEFAULT, desc.BindFlags, D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE, desc.MiscFlags, 0, usage, &resource); + if(hr != S_OK) + return hr; + *out_surface = new GalliumD3D11Surface(this, resource, desc, usage); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreateShaderResourceView( + ID3D11Resource *iresource, + const D3D11_SHADER_RESOURCE_VIEW_DESC *desc, + ID3D11ShaderResourceView **out_srv) + { +#if API >= 11 + D3D11_SHADER_RESOURCE_VIEW_DESC def_desc; +#else + if(desc->ViewDimension == D3D10_1_SRV_DIMENSION_TEXTURECUBEARRAY) + return E_INVALIDARG; + D3D10_SHADER_RESOURCE_VIEW_DESC1 desc1; + memset(&desc1, 0, sizeof(desc1)); + memcpy(&desc1, desc, sizeof(*desc)); + return CreateShaderResourceView1(iresource, &desc1, (ID3D10ShaderResourceView1**)out_srv); + } + + virtual HRESULT STDMETHODCALLTYPE CreateShaderResourceView1( + ID3D11Resource *iresource, + const D3D10_SHADER_RESOURCE_VIEW_DESC1 *desc, + ID3D10ShaderResourceView1 **out_srv) + { + D3D10_SHADER_RESOURCE_VIEW_DESC1 def_desc; +#endif + SYNCHRONIZED; + + if(!desc) + { + struct pipe_resource* resource = ((GalliumD3D11Resource<>*)iresource)->resource; + init_pipe_to_dxgi_format(); + memset(&def_desc, 0, sizeof(def_desc)); + def_desc.Format = pipe_to_dxgi_format[resource->format]; + switch(resource->target) + { + case PIPE_BUFFER: + def_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + def_desc.Buffer.ElementWidth = resource->width0; + break; + case PIPE_TEXTURE_1D: + def_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + def_desc.Texture1D.MipLevels = resource->last_level + 1; + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + def_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + def_desc.Texture2D.MipLevels = resource->last_level + 1; + break; + case PIPE_TEXTURE_3D: + def_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + def_desc.Texture3D.MipLevels = resource->last_level + 1; + break; + case PIPE_TEXTURE_CUBE: + def_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE; + def_desc.TextureCube.MipLevels = resource->last_level + 1; + break; + default: + return E_INVALIDARG; + } + desc = &def_desc; + } + + struct pipe_sampler_view templat; + memset(&templat, 0, sizeof(templat)); + if(invalid(format >= DXGI_FORMAT_COUNT)) + return E_INVALIDARG; + templat.format = dxgi_to_pipe_format[desc->Format]; + if(!templat.format) + return E_NOTIMPL; + templat.swizzle_r = PIPE_SWIZZLE_RED; + templat.swizzle_g = PIPE_SWIZZLE_GREEN; + templat.swizzle_b = PIPE_SWIZZLE_BLUE; + templat.swizzle_a = PIPE_SWIZZLE_ALPHA; + + templat.texture = ((GalliumD3D11Resource<>*)iresource)->resource; + switch(desc->ViewDimension) + { + case D3D11_SRV_DIMENSION_TEXTURE1D: + case D3D11_SRV_DIMENSION_TEXTURE2D: + case D3D11_SRV_DIMENSION_TEXTURE3D: + case D3D11_SRV_DIMENSION_TEXTURE1DARRAY: + case D3D11_SRV_DIMENSION_TEXTURE2DARRAY: + /* yes, this works for all of these types (but TODO: texture arrays) */ + templat.first_level = desc->Texture1D.MostDetailedMip; + templat.last_level = templat.first_level + desc->Texture1D.MipLevels - 1; + break; + case D3D11_SRV_DIMENSION_BUFFER: + case D3D11_SRV_DIMENSION_TEXTURE2DMS: + case D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY: + return E_NOTIMPL; + default: + return E_INVALIDARG; + } + + if(!out_srv) + return S_FALSE; + + struct pipe_sampler_view* view = immediate_pipe->create_sampler_view(immediate_pipe, templat.texture, &templat); + if(!view) + return E_FAIL; + *out_srv = new GalliumD3D11ShaderResourceView(this, (GalliumD3D11Resource<>*)iresource, view, *desc); + return S_OK; + } + +#if API >= 11 + virtual HRESULT STDMETHODCALLTYPE CreateUnorderedAccessView( + ID3D11Resource *resource, + const D3D11_UNORDERED_ACCESS_VIEW_DESC *desc, + ID3D11UnorderedAccessView **out_uav) + { + SYNCHRONIZED; + + return E_NOTIMPL; + + // remember to return S_FALSE and not crash if out_u_a_view == 0 and parameters are valid + } +#endif + + virtual HRESULT STDMETHODCALLTYPE CreateRenderTargetView( + ID3D11Resource *iresource, + const D3D11_RENDER_TARGET_VIEW_DESC *desc, + ID3D11RenderTargetView **out_rtv) + { + SYNCHRONIZED; + + D3D11_RENDER_TARGET_VIEW_DESC def_desc; + if(!desc) + { + struct pipe_resource* resource = ((GalliumD3D11Resource<>*)iresource)->resource; + init_pipe_to_dxgi_format(); + memset(&def_desc, 0, sizeof(def_desc)); + def_desc.Format = pipe_to_dxgi_format[resource->format]; + switch(resource->target) + { + case PIPE_BUFFER: + def_desc.ViewDimension = D3D11_RTV_DIMENSION_BUFFER; + def_desc.Buffer.ElementWidth = resource->width0; + break; + case PIPE_TEXTURE_1D: + def_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1D; + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + def_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + break; + case PIPE_TEXTURE_3D: + def_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE3D; + def_desc.Texture3D.WSize = resource->depth0; + break; + case PIPE_TEXTURE_CUBE: + def_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DARRAY; + def_desc.Texture2DArray.ArraySize = 6; + break; + default: + return E_INVALIDARG; + } + desc = &def_desc; + } + + unsigned zslice = 0; + unsigned face = 0; + unsigned level; + enum pipe_format format; + if(invalid(desc->format >= DXGI_FORMAT_COUNT)) + return E_INVALIDARG; + format = dxgi_to_pipe_format[desc->Format]; + if(!format) + return E_NOTIMPL; + + switch(desc->ViewDimension) + { + case D3D11_RTV_DIMENSION_TEXTURE1D: + case D3D11_RTV_DIMENSION_TEXTURE2D: + level = desc->Texture1D.MipSlice; + break; + case D3D11_RTV_DIMENSION_TEXTURE3D: + level = desc->Texture3D.MipSlice; + zslice = desc->Texture3D.FirstWSlice; + break; + case D3D11_RTV_DIMENSION_TEXTURE1DARRAY: + case D3D11_RTV_DIMENSION_TEXTURE2DARRAY: + level = desc->Texture1DArray.MipSlice; + face = desc->Texture1DArray.FirstArraySlice; + break; + case D3D11_RTV_DIMENSION_BUFFER: + case D3D11_RTV_DIMENSION_TEXTURE2DMS: + case D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY: + return E_NOTIMPL; + default: + return E_INVALIDARG; + } + + if(!out_rtv) + return S_FALSE; + + struct pipe_surface* surface = screen->get_tex_surface(screen, + ((GalliumD3D11Resource<>*)iresource)->resource, + face, level, zslice, PIPE_BIND_RENDER_TARGET); + if(!surface) + return E_FAIL; + /* muhahahahaha, let's hope this actually works */ + surface->format = format; + *out_rtv = new GalliumD3D11RenderTargetView(this, (GalliumD3D11Resource<>*)iresource, surface, *desc); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreateDepthStencilView( + ID3D11Resource *iresource, + const D3D11_DEPTH_STENCIL_VIEW_DESC *desc, + ID3D11DepthStencilView **out_depth_stencil_view) + { + SYNCHRONIZED; + + D3D11_DEPTH_STENCIL_VIEW_DESC def_desc; + if(!desc) + { + struct pipe_resource* resource = ((GalliumD3D11Resource<>*)iresource)->resource; + init_pipe_to_dxgi_format(); + memset(&def_desc, 0, sizeof(def_desc)); + def_desc.Format = pipe_to_dxgi_format[resource->format]; + switch(resource->target) + { + case PIPE_TEXTURE_1D: + def_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE1D; + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + def_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D; + break; + case PIPE_TEXTURE_CUBE: + def_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2DARRAY; + def_desc.Texture2DArray.ArraySize = 6; + break; + default: + return E_INVALIDARG; + } + desc = &def_desc; + } + + unsigned zslice = 0; + unsigned face = 0; + unsigned level; + enum pipe_format format; + if(invalid(desc->format >= DXGI_FORMAT_COUNT)) + return E_INVALIDARG; + format = dxgi_to_pipe_format[desc->Format]; + if(!format) + return E_NOTIMPL; + + switch(desc->ViewDimension) + { + case D3D11_DSV_DIMENSION_TEXTURE1D: + case D3D11_DSV_DIMENSION_TEXTURE2D: + level = desc->Texture1D.MipSlice; + break; + case D3D11_DSV_DIMENSION_TEXTURE1DARRAY: + case D3D11_DSV_DIMENSION_TEXTURE2DARRAY: + level = desc->Texture1DArray.MipSlice; + face = desc->Texture1DArray.FirstArraySlice; + break; + case D3D11_DSV_DIMENSION_TEXTURE2DMS: + case D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY: + return E_NOTIMPL; + default: + return E_INVALIDARG; + } + + if(!out_depth_stencil_view) + return S_FALSE; + + struct pipe_surface* surface = screen->get_tex_surface(screen, + ((GalliumD3D11Resource<>*)iresource)->resource, + face, level, zslice, PIPE_BIND_DEPTH_STENCIL); + if(!surface) + return E_FAIL; + /* muhahahahaha, let's hope this actually works */ + surface->format = format; + *out_depth_stencil_view = new GalliumD3D11DepthStencilView(this, (GalliumD3D11Resource<>*)iresource, surface, *desc); + return S_OK; + } + + GalliumD3D11Shader<>* create_stage_shader(unsigned type, const void* shader_bytecode, SIZE_T bytecode_length +#if API >= 11 + , ID3D11ClassLinkage *class_linkage +#endif + ) + { + bool dump = debug_get_option_dump_shaders(); + + dxbc_chunk_header* sm4_chunk = dxbc_find_shader_bytecode(shader_bytecode, bytecode_length); + if(!sm4_chunk) + return 0; + + std::auto_ptr<sm4_program> sm4(sm4_parse(sm4_chunk + 1, bswap_le32(sm4_chunk->size))); + if(!sm4.get()) + return 0; + + if(dump) + sm4->dump(); + + struct pipe_shader_state tgsi_shader; + memset(&tgsi_shader, 0, sizeof(tgsi_shader)); + tgsi_shader.tokens = (const tgsi_token*)sm4_to_tgsi(*sm4); + if(!tgsi_shader.tokens) + return 0; + + if(dump) + tgsi_dump(tgsi_shader.tokens, 0); + + void* shader_cso; + GalliumD3D11Shader<>* shader; + + switch(type) + { + case PIPE_SHADER_VERTEX: + shader_cso = immediate_pipe->create_vs_state(immediate_pipe, &tgsi_shader); + shader = (GalliumD3D11Shader<>*)new GalliumD3D11VertexShader(this, shader_cso); + break; + case PIPE_SHADER_FRAGMENT: + shader_cso = immediate_pipe->create_fs_state(immediate_pipe, &tgsi_shader); + shader = (GalliumD3D11Shader<>*)new GalliumD3D11PixelShader(this, shader_cso); + break; + case PIPE_SHADER_GEOMETRY: + shader_cso = immediate_pipe->create_gs_state(immediate_pipe, &tgsi_shader); + shader = (GalliumD3D11Shader<>*)new GalliumD3D11GeometryShader(this, shader_cso); + break; + default: + shader_cso = 0; + shader = 0; + break; + } + + if(shader) + { + shader->slot_to_resource = sm4->slot_to_resource; + shader->slot_to_sampler = sm4->slot_to_sampler; + } + + free((void*)tgsi_shader.tokens); + return shader; + } + +#if API >= 11 +#define CREATE_SHADER_ARGS \ + const void *shader_bytecode, \ + SIZE_T bytecode_length, \ + ID3D11ClassLinkage *class_linkage +#define PASS_SHADER_ARGS shader_bytecode, bytecode_length, class_linkage +#else +#define CREATE_SHADER_ARGS \ + const void *shader_bytecode, \ + SIZE_T bytecode_length +#define PASS_SHADER_ARGS shader_bytecode, bytecode_length +#endif + +#define IMPLEMENT_CREATE_SHADER(Stage, GALLIUM) \ + virtual HRESULT STDMETHODCALLTYPE Create##Stage##Shader( \ + CREATE_SHADER_ARGS, \ + ID3D11##Stage##Shader **out_shader) \ + { \ + SYNCHRONIZED; \ + GalliumD3D11##Stage##Shader* shader = (GalliumD3D11##Stage##Shader*)create_stage_shader(PIPE_SHADER_##GALLIUM, PASS_SHADER_ARGS); \ + if(!shader) \ + return E_FAIL; \ + if(out_shader) \ + { \ + *out_shader = shader; \ + return S_OK; \ + } \ + else \ + { \ + shader->Release(); \ + return S_FALSE; \ + } \ + } + +#define IMPLEMENT_NOTIMPL_CREATE_SHADER(Stage) \ + virtual HRESULT STDMETHODCALLTYPE Create##Stage##Shader( \ + CREATE_SHADER_ARGS, \ + ID3D11##Stage##Shader **out_shader) \ + { \ + return E_NOTIMPL; \ + } + + IMPLEMENT_CREATE_SHADER(Vertex, VERTEX) + IMPLEMENT_CREATE_SHADER(Pixel, FRAGMENT) + IMPLEMENT_CREATE_SHADER(Geometry, GEOMETRY) +#if API >= 11 + IMPLEMENT_NOTIMPL_CREATE_SHADER(Hull) + IMPLEMENT_NOTIMPL_CREATE_SHADER(Domain) + IMPLEMENT_NOTIMPL_CREATE_SHADER(Compute) +#endif + + virtual HRESULT STDMETHODCALLTYPE CreateGeometryShaderWithStreamOutput( + const void *shader_bytecode, + SIZE_T bytecode_length, + const D3D11_SO_DECLARATION_ENTRY *so_declaration, + unsigned num_entries, +#if API >= 11 + const unsigned *buffer_strides, + unsigned num_strides, + unsigned rasterized_stream, + ID3D11ClassLinkage *class_linkage, +#else + UINT output_stream_stride, +#endif + ID3D11GeometryShader **out_geometry_shader) + { + SYNCHRONIZED; + + return E_NOTIMPL; + + // remember to return S_FALSE if ppGeometyShader == NULL and the shader is OK + } + +#if API >= 11 + virtual HRESULT STDMETHODCALLTYPE CreateClassLinkage( + ID3D11ClassLinkage **out_linkage) + { + SYNCHRONIZED; + + return E_NOTIMPL; + } +#endif + + virtual HRESULT STDMETHODCALLTYPE CreateQuery( + const D3D11_QUERY_DESC *query_desc, + ID3D11Query **out_query) + { + SYNCHRONIZED; + + if(invalid(query_desc->Query >= D3D11_QUERY_COUNT)) + return E_INVALIDARG; + unsigned query_type = d3d11_to_pipe_query[query_desc->Query]; + if(!query_type) + return E_NOTIMPL; + + if(out_query) + return S_FALSE; + + struct pipe_query* query = immediate_pipe->create_query(immediate_pipe, query_type); + if(!query) + return E_FAIL; + + *out_query = new GalliumD3D11Query(this, query, d3d11_query_size[query_desc->Query], *query_desc); + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE CreatePredicate( + const D3D11_QUERY_DESC *predicate_desc, + ID3D11Predicate **out_predicate) + { + SYNCHRONIZED; + + unsigned query_type; + switch(predicate_desc->Query) + { + case D3D11_QUERY_SO_OVERFLOW_PREDICATE: + return E_NOTIMPL; + case D3D11_QUERY_OCCLUSION_PREDICATE: + query_type = PIPE_QUERY_OCCLUSION_COUNTER; + break; + default: + return E_INVALIDARG; + } + + if(out_predicate) + return S_FALSE; + + struct pipe_query* query = immediate_pipe->create_query(immediate_pipe, query_type); + if(!query) + return E_FAIL; + + *out_predicate = new GalliumD3D11Predicate(this, query, sizeof(BOOL), *predicate_desc); + return S_OK; + } + + + virtual HRESULT STDMETHODCALLTYPE CreateCounter( + const D3D11_COUNTER_DESC *counter_desc, + ID3D11Counter **out_counter) + { + SYNCHRONIZED; + + return E_NOTIMPL; + + // remember to return S_FALSE if out_counter == NULL and everything is OK + } + +#if API >= 11 + virtual HRESULT STDMETHODCALLTYPE CreateDeferredContext( + unsigned context_flags, + ID3D11DeviceContext **out_deferred_context) + { + SYNCHRONIZED; + + // TODO: this will have to be implemented using a new Gallium util module + return E_NOTIMPL; + + // remember to return S_FALSE if out_counter == NULL and everything is OK + } +#endif + + virtual HRESULT STDMETHODCALLTYPE OpenSharedResource( + HANDLE resource, + REFIID iid, + void **out_resource) + { + SYNCHRONIZED; + + // TODO: the problem here is that we need to communicate dimensions somehow + return E_NOTIMPL; + + // remember to return S_FALSE if out_counter == NULL and everything is OK +#if 0 + struct pipe_resou rce templat; + struct winsys_handle handle; + handle.stride = 0; + handle.handle = resource; + handle.type = DRM_API_HANDLE_TYPE_SHARED; + screen->resource_from_handle(screen, &templat, &handle); +#endif + } + +#if API < 11 + /* these are documented as "Not implemented". + * According to the UMDDI documentation, they apparently turn on a + * (width + 1) x (height + 1) convolution filter for 1-bit textures. + * Probably nothing uses these, assuming it has ever been implemented anywhere. + */ + void STDMETHODCALLTYPE SetTextFilterSize( + UINT width, + UINT height + ) + {} + + virtual void STDMETHODCALLTYPE GetTextFilterSize( + UINT *width, + UINT *height + ) + {} +#endif + +#if API >= 11 + virtual void STDMETHODCALLTYPE RestoreGalliumState() + { + GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(immediate_context); + } + + virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly() + { + GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(immediate_context); + } +#endif + + virtual struct pipe_context* STDMETHODCALLTYPE GetGalliumContext(void) + { + return immediate_pipe; + } + +#undef SYNCHRONIZED +}; diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/Makefile b/src/gallium/state_trackers/d3d1x/gd3d1x/Makefile new file mode 100644 index 0000000000..32d29563ec --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d1x/Makefile @@ -0,0 +1,7 @@ +LIBNAME=gd3d1x +CPP_SOURCES=$(wildcard *.cpp) +LIBRARY_INCLUDES=-Iinclude -I../gd3dapi -I../d3dapi -I../w32api -I../d3d1xstutil/include -I../d3d1xshader/include -I../../../include -I../../../auxiliary -I../../../state_trackers/egl/common +PROGS=tools/dxbc2tgsi +PROGS_DEPS=libgd3d1x.a ../d3d1xshader/libd3d1xshader.a ../d3d1xstutil/libd3d1xstutil.a ../../../auxiliary/libgallium.a +LIBS=$(PROGS_DEPS) -ldl +include ../Makefile.inc diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/d3d1x_private.h b/src/gallium/state_trackers/d3d1x/gd3d1x/d3d1x_private.h new file mode 100644 index 0000000000..977f0cd2ce --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d1x/d3d1x_private.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef D3D1X_PRIVATE_H_ +#define D3D1X_PRIVATE_H_ + +#include <algorithm> +#include <vector> +#include <string> +#include <float.h> + +#include "dxbc.h" +#include "sm4.h" +#include "sm4_to_tgsi.h" + +#include "d3d1xstutil.h" + +#include <d3d11.h> +#include <d3d11shader.h> + +extern "C" +{ +#include <pipe/p_defines.h> +#include <pipe/p_screen.h> +#include <pipe/p_context.h> +#include <util/u_inlines.h> +#include <util/u_format.h> +#include <util/u_caps.h> +#include <util/u_debug.h> +#include <os/os_thread.h> +} + +#include "galliumdxgi.h" +#include "galliumd3d10_1.h" +#include "galliumd3d11.h" + +#ifdef CHECK +#define invalid(x) unlikely(x) +#else +#define invalid(x) (0) +#endif + +#define D3D10_STAGE_VS 0 +#define D3D10_STAGE_PS 1 +#define D3D10_STAGE_GS 2 +#define D3D10_STAGES 3 + +#define D3D11_STAGE_VS 0 +#define D3D11_STAGE_PS 1 +#define D3D11_STAGE_GS 2 +#define D3D11_STAGE_HS 3 +#define D3D11_STAGE_DS 4 +#define D3D11_STAGE_CS 5 +#define D3D11_STAGES 6 + +#define D3D11_BLEND_COUNT 20 +extern unsigned d3d11_to_pipe_blend[D3D11_BLEND_COUNT]; + +#define D3D11_USAGE_COUNT 4 +extern unsigned d3d11_to_pipe_usage[D3D11_USAGE_COUNT]; + +#define D3D11_STENCIL_OP_COUNT 9 +extern unsigned d3d11_to_pipe_stencil_op[D3D11_STENCIL_OP_COUNT]; + +#define D3D11_TEXTURE_ADDRESS_COUNT 6 +extern unsigned d3d11_to_pipe_wrap[D3D11_TEXTURE_ADDRESS_COUNT]; + +#define D3D11_QUERY_COUNT 16 +extern unsigned d3d11_to_pipe_query[D3D11_QUERY_COUNT]; +extern unsigned d3d11_query_size[D3D11_QUERY_COUNT]; + +#endif /* D3D1X_H_ */ diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/d3d_enums.cpp b/src/gallium/state_trackers/d3d1x/gd3d1x/d3d_enums.cpp new file mode 100644 index 0000000000..853d11410d --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d1x/d3d_enums.cpp @@ -0,0 +1,123 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "d3d1x_private.h" + +unsigned d3d11_to_pipe_blend[D3D11_BLEND_COUNT] = +{ + PIPE_BLENDFACTOR_ONE, /* absent in D3D11, but apparently accepted */ + PIPE_BLENDFACTOR_ZERO, + PIPE_BLENDFACTOR_ONE, + PIPE_BLENDFACTOR_SRC_COLOR, + PIPE_BLENDFACTOR_INV_SRC_COLOR, + PIPE_BLENDFACTOR_SRC_ALPHA, + PIPE_BLENDFACTOR_INV_SRC_ALPHA, + PIPE_BLENDFACTOR_DST_ALPHA, + PIPE_BLENDFACTOR_INV_DST_ALPHA, + PIPE_BLENDFACTOR_DST_COLOR, + PIPE_BLENDFACTOR_INV_DST_COLOR, + PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE, + 0, /* absent in D3D11 */ + 0, /* absent in D3D11 */ + PIPE_BLENDFACTOR_CONST_COLOR, + PIPE_BLENDFACTOR_INV_CONST_COLOR, + PIPE_BLENDFACTOR_SRC1_COLOR, + PIPE_BLENDFACTOR_INV_SRC1_COLOR, + PIPE_BLENDFACTOR_SRC1_ALPHA, + PIPE_BLENDFACTOR_INV_SRC1_ALPHA +}; + +unsigned d3d11_to_pipe_usage[D3D11_USAGE_COUNT] = +{ + PIPE_USAGE_DEFAULT, + PIPE_USAGE_IMMUTABLE, + PIPE_USAGE_DYNAMIC, + PIPE_USAGE_STAGING +}; + +unsigned d3d11_to_pipe_stencil_op[D3D11_STENCIL_OP_COUNT] = +{ + PIPE_STENCIL_OP_KEEP, + PIPE_STENCIL_OP_KEEP, + PIPE_STENCIL_OP_ZERO, + PIPE_STENCIL_OP_REPLACE, + PIPE_STENCIL_OP_INCR, + PIPE_STENCIL_OP_DECR, + PIPE_STENCIL_OP_INVERT, + PIPE_STENCIL_OP_INCR_WRAP, + PIPE_STENCIL_OP_DECR_WRAP, +}; + +unsigned d3d11_to_pipe_wrap[D3D11_TEXTURE_ADDRESS_COUNT] = +{ + PIPE_TEX_WRAP_REPEAT, + PIPE_TEX_WRAP_REPEAT, + PIPE_TEX_WRAP_MIRROR_REPEAT, + PIPE_TEX_WRAP_CLAMP_TO_EDGE, + PIPE_TEX_WRAP_CLAMP_TO_BORDER, + PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE, +}; + +unsigned d3d11_to_pipe_query[D3D11_QUERY_COUNT] = +{ + PIPE_QUERY_GPU_FINISHED, + PIPE_QUERY_OCCLUSION_COUNTER, + PIPE_QUERY_TIME_ELAPSED, + PIPE_QUERY_TIMESTAMP_DISJOINT, + 0, /* D3D11_QUERY_PIPELINE_STATISTICS */ + PIPE_QUERY_OCCLUSION_COUNTER, + PIPE_QUERY_SO_STATISTICS, + 0, /* D3D11_QUERY_SO_OVERFLOW_PREDICATE */ + /* per-stream SO queries */ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + +unsigned d3d11_query_size[D3D11_QUERY_COUNT] = +{ + sizeof(BOOL), + sizeof(UINT64), + sizeof(UINT64), + sizeof(UINT64), + 0, + sizeof(BOOL), + sizeof(D3D11_QUERY_DATA_SO_STATISTICS), + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp new file mode 100644 index 0000000000..615ce8c255 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp @@ -0,0 +1,869 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "d3d1xstutil.h" +#include "sm4.h" +#include "tgsi/tgsi_ureg.h" +#include <vector> + +#if 1 +#define check(x) assert(x) +#define fail(x) assert(0 && (x)) +#else +#define check(x) do {if(!(x)) throw(#x);} while(0) +#define fail(x) throw(x) +#endif + +struct tgsi_interpolation +{ + unsigned interpolation; + bool centroid; +}; + +static tgsi_interpolation sm4_to_pipe_interpolation[] = +{ + {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* UNDEFINED */ + {TGSI_INTERPOLATE_CONSTANT, false}, + {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* LINEAR */ + {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_CENTROID */ + {TGSI_INTERPOLATE_LINEAR, false}, /* LINEAR_NOPERSPECTIVE */ + {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_CENTROID */ + + // Added in D3D10.1 + {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_SAMPLE */ + {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_SAMPLE */ +}; + +static int sm4_to_pipe_sv[] = +{ + -1, + TGSI_SEMANTIC_POSITION, + -1, /*TGSI_SEMANTIC_CLIP_DISTANCE */ + -1, /*TGSI_SEMANTIC_CULL_DISTANCE */ + -1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */ + -1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */ + -1, /*TGSI_SEMANTIC_VERTEXID,*/ + TGSI_SEMANTIC_PRIMID, + TGSI_SEMANTIC_INSTANCEID, + TGSI_SEMANTIC_FACE, + -1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/ +}; + +struct sm4_to_tgsi_converter +{ + struct ureg_program* ureg; + std::vector<struct ureg_dst> temps; + std::vector<struct ureg_dst> outputs; + std::vector<struct ureg_src> inputs; + std::vector<struct ureg_src> samplers; + std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison + std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison + std::vector<std::pair<unsigned, unsigned> > loops; + sm4_insn* insn; + struct sm4_program& program; + std::vector<unsigned> sm4_to_tgsi_insn_num; + std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num; + bool in_sub; + bool avoid_txf; + bool avoid_int; + + sm4_to_tgsi_converter(struct sm4_program& program) + : program(program) + { + avoid_txf = true; + avoid_int = false; + } + + struct ureg_dst _reg(sm4_op& op) + { + switch(op.file) + { + case SM4_FILE_NULL: + { + struct ureg_dst d; + memset(&d, 0, sizeof(d)); + d.File = TGSI_FILE_NULL; + return d; + } + case SM4_FILE_TEMP: + check(op.has_simple_index()); + check(op.indices[0].disp < temps.size()); + return temps[op.indices[0].disp]; + case SM4_FILE_OUTPUT: + check(op.has_simple_index()); + check(op.indices[0].disp < outputs.size()); + return outputs[op.indices[0].disp]; + default: + check(0); + return ureg_dst_undef(); + } + } + + struct ureg_dst _dst(unsigned i = 0) + { + check(i < insn->num_ops); + sm4_op& op = *insn->ops[i]; + check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR); + struct ureg_dst d = ureg_writemask(_reg(op), op.mask); + if(insn->insn.sat) + d = ureg_saturate(d); + return d; + } + + struct ureg_src _src(unsigned i) + { + check(i < insn->num_ops); + sm4_op& op = *insn->ops[i]; + struct ureg_src s; + switch(op.file) + { + case SM4_FILE_IMMEDIATE32: + s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32); + break; + case SM4_FILE_INPUT: + check(op.is_index_simple(0)); + check(op.num_indices == 1 || op.num_indices == 2); + // TODO: is this correct, or are incorrectly swapping the two indices in the GS case? + check(op.indices[op.num_indices - 1].disp < inputs.size()); + s = inputs[op.indices[op.num_indices - 1].disp]; + if(op.num_indices == 2) + { + s.Dimension = 1; + s.DimensionIndex = op.indices[0].disp; + } + break; + case SM4_FILE_CONSTANT_BUFFER: + // TODO: indirect addressing + check(op.num_indices == 2); + check(op.is_index_simple(0)); + check(op.is_index_simple(1)); + s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp); + s.Dimension = 1; + s.DimensionIndex = op.indices[0].disp; + break; + default: + s = ureg_src(_reg(op)); + break; + } + if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR) + s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]); + else + { + /* immediates are masked to show needed values */ + check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64); + } + if(op.abs) + s = ureg_abs(s); + if(op.neg) + s = ureg_negate(s); + return s; + }; + + int _idx(sm4_file file, unsigned i = 0) + { + check(i < insn->num_ops); + sm4_op& op = *insn->ops[i]; + check(op.file == file); + check(op.has_simple_index()); + return (int)op.indices[0].disp; + } + + int _texslot(bool have_sampler = true) + { + std::map<std::pair<int, int>, int>::iterator i; + i = program.resource_sampler_to_slot.find(std::make_pair(_idx(SM4_FILE_RESOURCE, 2), have_sampler ? _idx(SM4_FILE_SAMPLER, 3) : -1)); + check(i != program.resource_sampler_to_slot.end()); + return i->second; + } + + unsigned tex_target(unsigned texslot) + { + unsigned mode = sampler_modes[program.slot_to_sampler[texslot]]; + unsigned target; + if(mode) + target = targets[program.slot_to_resource[texslot]].second; + else + target = targets[program.slot_to_resource[texslot]].first; + check(target); + return target; + } + + std::vector<struct ureg_dst> insn_tmps; + + struct ureg_dst _tmp() + { + struct ureg_dst t = ureg_DECL_temporary(ureg); + insn_tmps.push_back(t); + return t; + } + + struct ureg_dst _tmp(struct ureg_dst d) + { + if(d.File == TGSI_FILE_TEMPORARY) + return d; + else + return ureg_writemask(_tmp(), d.WriteMask); + } + +#define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break +#define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break +#define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break +#define OP1(n) OP1_(n, n) +#define OP2(n) OP2_(n, n) +#define OP3(n) OP3_(n, n) +#define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break; + + void translate_insns(unsigned begin, unsigned end) + { + for(unsigned insn_num = begin; insn_num < end; ++insn_num) + { + sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg); + unsigned label; + insn = program.insns[insn_num]; + bool ok; + ok = true; + switch(insn->opcode) + { + // trivial instructions + case SM4_OPCODE_NOP: + break; + OP1(MOV); + + // float + OP2(ADD); + OP2(MUL); + OP3(MAD); + OP2(DIV); + OP1(FRC); + OP1(RCP); + OP2(MIN); + OP2(MAX); + OP2_(LT, SLT); + OP2_(GE, SGE); + OP2_(EQ, SEQ); + OP2_(NE, SNE); + + // bitwise + OP1(NOT); + OP2(AND); + OP2(OR); + OP2(XOR); + + // special mathematical + OP2(DP2); + OP2(DP3); + OP2(DP4); + OP1(RSQ); + OP1_(LOG, LG2); + OP1_(EXP, EX2); + + // rounding + OP1_(ROUND_NE, ROUND); + OP1_(ROUND_Z, TRUNC); + OP1_(ROUND_PI, CEIL); + OP1_(ROUND_NI, FLR); + + // cross-thread + OP1_(DERIV_RTX, DDX); + OP1_(DERIV_RTX_COARSE, DDX); + OP1_(DERIV_RTX_FINE, DDX); + OP1_(DERIV_RTY, DDY); + OP1_(DERIV_RTY_COARSE, DDY); + OP1_(DERIV_RTY_FINE, DDY); + case SM4_OPCODE_EMIT: + ureg_EMIT(ureg); + break; + case SM4_OPCODE_CUT: + ureg_ENDPRIM(ureg); + break; + case SM4_OPCODE_EMITTHENCUT: + ureg_EMIT(ureg); + ureg_ENDPRIM(ureg); + break; + + // non-trivial instructions + case SM4_OPCODE_MOVC: + /* CMP checks for < 0, but MOVC checks for != 0 + * but fortunately, x != 0 is equivalent to -abs(x) < 0 + * XXX: can test_nz apply to this?! + */ + ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3)); + break; + case SM4_OPCODE_SQRT: + { + struct ureg_dst d = _dst(); + struct ureg_dst t = _tmp(d); + ureg_RSQ(ureg, t, _src(1)); + ureg_RCP(ureg, d, ureg_src(t)); + break; + } + case SM4_OPCODE_SINCOS: + { + struct ureg_dst s = _dst(0); + struct ureg_dst c = _dst(1); + struct ureg_src v = _src(2); + if(s.File != TGSI_FILE_NULL) + ureg_SIN(ureg, s, v); + if(c.File != TGSI_FILE_NULL) + ureg_COS(ureg, c, v); + break; + } + + // control flow + case SM4_OPCODE_DISCARD: + ureg_KIL(ureg, _src(0)); + break; + OP_CF(LOOP, BGNLOOP); + OP_CF(ENDLOOP, ENDLOOP); + case SM4_OPCODE_BREAK: + ureg_BRK(ureg); + break; + case SM4_OPCODE_BREAKC: + // XXX: can test_nz apply to this?! + ureg_BREAKC(ureg, _src(0)); + break; + case SM4_OPCODE_CONTINUE: + ureg_CONT(ureg); + break; + case SM4_OPCODE_CONTINUEC: + // XXX: can test_nz apply to this?! + ureg_IF(ureg, _src(0), &label); + ureg_CONT(ureg); + ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); + ureg_ENDIF(ureg); + break; + case SM4_OPCODE_SWITCH: + ureg_SWITCH(ureg, _src(0)); + break; + case SM4_OPCODE_CASE: + ureg_CASE(ureg, _src(0)); + break; + case SM4_OPCODE_DEFAULT: + ureg_DEFAULT(ureg); + break; + case SM4_OPCODE_ENDSWITCH: + ureg_ENDSWITCH(ureg); + break; + case SM4_OPCODE_CALL: + ureg_CAL(ureg, &label); + label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)])); + break; + case SM4_OPCODE_LABEL: + if(in_sub) + ureg_ENDSUB(ureg); + else + ureg_END(ureg); + ureg_BGNSUB(ureg); + in_sub = true; + break; + case SM4_OPCODE_RET: + if(in_sub || insn_num != (program.insns.size() - 1)) + ureg_RET(ureg); + break; + case SM4_OPCODE_RETC: + ureg_IF(ureg, _src(0), &label); + if(insn->insn.test_nz) + ureg_RET(ureg); + ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); + if(!insn->insn.test_nz) + { + ureg_ELSE(ureg, &label); + ureg_RET(ureg); + ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); + } + ureg_ENDIF(ureg); + break; + OP_CF(ELSE, ELSE); + case SM4_OPCODE_ENDIF: + ureg_ENDIF(ureg); + break; + case SM4_OPCODE_IF: + if(insn->insn.test_nz) + { + ureg_IF(ureg, _src(0), &label); + label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); + } + else + { + unsigned linked = program.cf_insn_linked[insn_num]; + if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF) + { + ureg_IF(ureg, _src(0), &label); + ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); + ureg_ELSE(ureg, &label); + label_to_sm4_insn_num.push_back(std::make_pair(label, linked)); + } + else + { + /* we have to swap the branches in this case (fun!) + * TODO: maybe just emit a SEQ 0? + * */ + unsigned endif = program.cf_insn_linked[linked]; + + ureg_IF(ureg, _src(0), &label); + label_to_sm4_insn_num.push_back(std::make_pair(label, linked)); + + translate_insns(linked + 1, endif); + + sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg); + ureg_ELSE(ureg, &label); + label_to_sm4_insn_num.push_back(std::make_pair(label, endif)); + + translate_insns(insn_num + 1, linked); + + insn_num = endif - 1; + goto next; + } + } + break; + case SM4_OPCODE_RESINFO: + { + std::map<int, int>::iterator i; + i = program.resource_to_slot.find(_idx(SM4_FILE_RESOURCE, 2)); + check(i != program.resource_to_slot.end()); + unsigned texslot = i->second; + + // no driver actually provides this, unfortunately + ureg_TXQ(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]); + break; + }; + // TODO: sample offset, sample index + case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg (ouch) + case SM4_OPCODE_LD_MS: + { + unsigned texslot = _texslot(false); + unsigned dim = 0; + switch(targets[texslot].first) + { + case TGSI_TEXTURE_1D: + dim = 1; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + dim = 2; + break; + case TGSI_TEXTURE_3D: + dim = 3; + break; + default: + check(0); + } + struct ureg_dst tmp = _tmp(); + if(avoid_txf) + { + struct ureg_src texcoord; + if(!avoid_int) + { + ureg_I2F(ureg, tmp, _src(1)); + texcoord = ureg_src(tmp); + } + else + texcoord = _src(1); + + ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_swizzle(texcoord, 0, 1, 2, dim), samplers[texslot]); + } + else + ureg_TXF(ureg, _dst(), tex_target(texslot), ureg_swizzle(_src(1), 0, 1, 2, dim), samplers[texslot]); + break; + } + case SM4_OPCODE_SAMPLE: // dst, coord, res, samp + { + unsigned texslot = _texslot(); + ureg_TEX(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]); + break; + } + case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x + { + unsigned texslot = _texslot(); + struct ureg_dst tmp = _tmp(); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); + ureg_TXB(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + break; + } + case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x + { + unsigned texslot = _texslot(); + struct ureg_dst tmp = _tmp(); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1)); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0)); + ureg_TEX(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + break; + } + case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x + { + unsigned texslot = _texslot(); + struct ureg_dst tmp = _tmp(); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1)); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0)); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 0.0)); + ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + break; + } + case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy + { + unsigned texslot = _texslot(); + ureg_TXD(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot], _src(4), _src(5)); + break; + } + case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x + { + unsigned texslot = _texslot(); + struct ureg_dst tmp = _tmp(); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); + ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + break; + } + default: + ok = false; + break; + } + + if(!ok && !avoid_int) + { + ok = true; + switch(insn->opcode) + { + // integer + OP1_(ITOF, I2F); + OP1_(FTOI, F2I); + OP2_(IADD, UADD); + OP1(INEG); + OP2_(IMUL, UMUL); + OP3_(IMAD, UMAD); + OP2_(ISHL, SHL); + OP2_(ISHR, ISHR); + OP2(IMIN); + OP2(IMAX); + OP2_(ILT, ISLT); + OP2_(IGE, ISGE); + OP2_(IEQ, USEQ); + OP2_(INE, USNE); + + // unsigned + OP1_(UTOF, U2F); + OP1_(FTOU, F2U); + OP2(UMUL); + OP3(UMAD); + OP2(UMIN); + OP2(UMAX); + OP2_(ULT, USLT); + OP2_(UGE, USGE); + OP2(USHR); + + case SM4_OPCODE_UDIV: + { + struct ureg_dst q = _dst(0); + struct ureg_dst r = _dst(1); + struct ureg_src a = _src(2); + struct ureg_src b = _src(3); + if(q.File != TGSI_FILE_NULL) + ureg_UDIV(ureg, q, a, b); + if(r.File != TGSI_FILE_NULL) + ureg_UMOD(ureg, r, a, b); + break; + } + default: + ok = false; + } + } + + if(!ok && avoid_int) + { + ok = true; + switch(insn->opcode) + { + case SM4_OPCODE_ITOF: + case SM4_OPCODE_UTOF: + break; + OP1_(FTOI, TRUNC); + OP1_(FTOU, FLR); + // integer + OP2_(IADD, ADD); + OP2_(IMUL, MUL); + OP3_(IMAD, MAD); + OP2_(MIN, MIN); + OP2_(MAX, MAX); + OP2_(ILT, SLT); + OP2_(IGE, SGE); + OP2_(IEQ, SEQ); + OP2_(INE, SNE); + + // unsigned + OP2_(UMUL, MUL); + OP3_(UMAD, MAD); + OP2_(UMIN, MIN); + OP2_(UMAX, MAX); + OP2_(ULT, SLT); + OP2_(UGE, SGE); + + case SM4_OPCODE_INEG: + ureg_MOV(ureg, _dst(), ureg_negate(_src(1))); + break; + case SM4_OPCODE_ISHL: + { + struct ureg_dst d = _dst(); + struct ureg_dst t = _tmp(d); + ureg_EX2(ureg, t, _src(2)); + ureg_MUL(ureg, d, ureg_src(t), _src(1)); + break; + } + case SM4_OPCODE_ISHR: + case SM4_OPCODE_USHR: + { + struct ureg_dst d = _dst(); + struct ureg_dst t = _tmp(d); + ureg_EX2(ureg, t, ureg_negate(_src(2))); + ureg_MUL(ureg, t, ureg_src(t), _src(1)); + ureg_FLR(ureg, d, ureg_src(t)); + break; + } + case SM4_OPCODE_UDIV: + { + struct ureg_dst q = _dst(0); + struct ureg_dst r = _dst(1); + struct ureg_src a = _src(2); + struct ureg_src b = _src(3); + struct ureg_dst f = _tmp(); + ureg_DIV(ureg, f, a, b); + if(q.File != TGSI_FILE_NULL) + ureg_FLR(ureg, q, ureg_src(f)); + if(r.File != TGSI_FILE_NULL) + { + ureg_FRC(ureg, f, ureg_src(f)); + ureg_MUL(ureg, r, ureg_src(f), b); + } + break; + } + default: + ok = false; + } + } + + check(ok); + + if(!insn_tmps.empty()) + { + for(unsigned i = 0; i < insn_tmps.size(); ++i) + ureg_release_temporary(ureg, insn_tmps[i]); + insn_tmps.clear(); + } +next:; + } + } + + void* do_translate() + { + unsigned processor; + switch(program.version.type) + { + case 0: + processor = TGSI_PROCESSOR_FRAGMENT; + break; + case 1: + processor = TGSI_PROCESSOR_VERTEX; + break; + case 2: + processor = TGSI_PROCESSOR_GEOMETRY; + break; + default: + fail("Tessellation and compute shaders not yet supported"); + return 0; + } + + if(!sm4_link_cf_insns(program)) + fail("Malformed control flow"); + if(!sm4_find_labels(program)) + fail("Failed to locate labels"); + if(!sm4_allocate_resource_sampler_pairs(program)) + fail("Unsupported (indirect?) accesses to resources and/or samplers"); + + ureg = ureg_create(processor); + + in_sub = false; + + for(unsigned i = 0; i < program.slot_to_resource.size(); ++i) + samplers.push_back(ureg_DECL_sampler(ureg, i)); + + sm4_to_tgsi_insn_num.resize(program.insns.size()); + for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num) + { + sm4_dcl& dcl = *program.dcls[insn_num]; + int idx = -1; + if(dcl.op.get() && dcl.op->is_index_simple(0)) + idx = dcl.op->indices[0].disp; + switch(dcl.opcode) + { + case SM4_OPCODE_DCL_GLOBAL_FLAGS: + break; + case SM4_OPCODE_DCL_TEMPS: + for(unsigned i = 0; i < dcl.num; ++i) + temps.push_back(ureg_DECL_temporary(ureg)); + break; + case SM4_OPCODE_DCL_INPUT: + check(idx >= 0); + if(processor == TGSI_PROCESSOR_VERTEX) + { + if(inputs.size() <= (unsigned)idx) + inputs.resize(idx + 1); + inputs[idx] = ureg_DECL_vs_input(ureg, idx); + } + else if(processor == TGSI_PROCESSOR_GEOMETRY) + { + // TODO: is this correct? + unsigned gsidx = dcl.op->indices[1].disp; + if(inputs.size() <= (unsigned)gsidx) + inputs.resize(gsidx + 1); + inputs[gsidx] = ureg_DECL_gs_input(ureg, gsidx, TGSI_SEMANTIC_GENERIC, gsidx); + } + else + check(0); + break; + case SM4_OPCODE_DCL_INPUT_PS: + check(idx >= 0); + if(inputs.size() <= (unsigned)idx) + inputs.resize(idx + 1); + inputs[idx] = ureg_DECL_fs_input_cyl_centroid(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].interpolation, 0, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].centroid); + break; + case SM4_OPCODE_DCL_OUTPUT: + check(idx >= 0); + if(outputs.size() <= (unsigned)idx) + outputs.resize(idx + 1); + if(processor == TGSI_PROCESSOR_FRAGMENT) + outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx); + else + outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx); + break; + case SM4_OPCODE_DCL_INPUT_SIV: + case SM4_OPCODE_DCL_INPUT_SGV: + case SM4_OPCODE_DCL_INPUT_PS_SIV: + case SM4_OPCODE_DCL_INPUT_PS_SGV: + check(idx >= 0); + if(inputs.size() <= (unsigned)idx) + inputs.resize(idx + 1); + // TODO: is this correct? + inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0); + break; + case SM4_OPCODE_DCL_OUTPUT_SIV: + case SM4_OPCODE_DCL_OUTPUT_SGV: + check(idx >= 0); + if(outputs.size() <= (unsigned)idx) + outputs.resize(idx + 1); + check(sm4_to_pipe_sv[dcl.sv] >= 0); + outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0); + break; + case SM4_OPCODE_DCL_RESOURCE: + check(idx >= 0); + if(targets.size() <= (unsigned)idx) + targets.resize(idx + 1); + switch(dcl.dcl_resource.target) + { + case SM4_TARGET_TEXTURE1D: + targets[idx].first = TGSI_TEXTURE_1D; + targets[idx].second = TGSI_TEXTURE_SHADOW1D; + break; + case SM4_TARGET_TEXTURE2D: + targets[idx].first = TGSI_TEXTURE_2D; + targets[idx].second = TGSI_TEXTURE_SHADOW2D; + break; + case SM4_TARGET_TEXTURE3D: + targets[idx].first = TGSI_TEXTURE_3D; + targets[idx].second = 0; + break; + case SM4_TARGET_TEXTURECUBE: + targets[idx].first = TGSI_TEXTURE_CUBE; + targets[idx].second = 0; + break; + default: + // HACK to make SimpleSample10 work + //check(0); + targets[idx].first = TGSI_TEXTURE_2D; + targets[idx].second = TGSI_TEXTURE_SHADOW2D; + break; + } + break; + case SM4_OPCODE_DCL_SAMPLER: + check(idx >= 0); + if(sampler_modes.size() <= (unsigned)idx) + sampler_modes.resize(idx + 1); + check(!dcl.dcl_sampler.mono); + sampler_modes[idx] = dcl.dcl_sampler.shadow; + break; + case SM4_OPCODE_DCL_CONSTANT_BUFFER: + check(dcl.op->num_indices == 2); + check(dcl.op->is_index_simple(0)); + check(dcl.op->is_index_simple(1)); + idx = dcl.op->indices[0].disp; + ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx); + break; + case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE: + ureg_property_gs_input_prim(ureg, d3d_to_pipe_prim_type[dcl.dcl_gs_input_primitive.primitive]); + break; + case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + ureg_property_gs_output_prim(ureg, d3d_to_pipe_prim[dcl.dcl_gs_output_primitive_topology.primitive_topology]); + break; + case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + ureg_property_gs_max_vertices(ureg, dcl.num); + break; + default: + check(0); + } + } + + translate_insns(0, program.insns.size()); + sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg)); + if(in_sub) + ureg_ENDSUB(ureg); + else + ureg_END(ureg); + + for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i) + ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]); + + const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0); + ureg_destroy(ureg); + return (void*)tokens; + } + + void* translate() + { + try + { + return do_translate(); + } + catch(const char*) + { + return 0; + } + } +}; + +void* sm4_to_tgsi(struct sm4_program& program) +{ + sm4_to_tgsi_converter conv(program); + return conv.translate(); +} diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.h b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.h new file mode 100644 index 0000000000..5722b277fb --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.h @@ -0,0 +1,34 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SM4_TO_TGSI_H_ +#define SM4_TO_TGSI_H_ + +#include "sm4.h" + +void* sm4_to_tgsi(struct sm4_program& program); + +#endif /* SM4_TO_TGSI_H_ */ diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/tools/dxbc2tgsi.cpp b/src/gallium/state_trackers/d3d1x/gd3d1x/tools/dxbc2tgsi.cpp new file mode 100644 index 0000000000..d210f8acad --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d1x/tools/dxbc2tgsi.cpp @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "dxbc.h" +#include "sm4.h" +#include "../sm4_to_tgsi.h" +#include "tgsi/tgsi_dump.h" +#include <iostream> +#include <fstream> + +void usage() +{ + std::cerr << "Gallium Direct3D10/11 Shader to TGSI converter\n"; + std::cerr << "This program is free software, released under a MIT-like license\n"; + std::cerr << "Not affiliated with or endorsed by Microsoft in any way\n"; + std::cerr << "Latest version available from http://cgit.freedesktop.org/mesa/mesa/\n"; + std::cerr << "\n"; + std::cerr << "Usage: dxbc2tgsi FILE\n"; + std::cerr << std::endl; +} + +int main(int argc, char** argv) +{ + if(argc < 2) + { + usage(); + return 1; + } + + std::vector<char> data; + std::ifstream in(argv[1]); + char c; + in >> std::noskipws; + while(in >> c) + data.push_back(c); + in.close(); + + dxbc_container* dxbc = dxbc_parse(&data[0], data.size()); + if(dxbc) + { + std::cout << *dxbc; + dxbc_chunk_header* sm4_chunk = dxbc_find_shader_bytecode(&data[0], data.size()); + if(sm4_chunk) + { + sm4_program* sm4 = sm4_parse(sm4_chunk + 1, bswap_le32(sm4_chunk->size)); + if(sm4) + { + const struct tgsi_token* tokens = (const struct tgsi_token*)sm4_to_tgsi(*sm4); + if(tokens) + { + std::cout << *sm4; + std::cout << "\n# TGSI program: " << std::endl; + tgsi_dump(tokens, 0); + } + } + } + delete dxbc; + } +} diff --git a/src/gallium/state_trackers/d3d1x/gd3dapi/Makefile b/src/gallium/state_trackers/d3d1x/gd3dapi/Makefile new file mode 100644 index 0000000000..8b16b1bcbc --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3dapi/Makefile @@ -0,0 +1,4 @@ +all: idl + +include ../Makefile.inc + diff --git a/src/gallium/state_trackers/d3d1x/gd3dapi/galliumcom.idl b/src/gallium/state_trackers/d3d1x/gd3dapi/galliumcom.idl new file mode 100644 index 0000000000..96faf1c070 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3dapi/galliumcom.idl @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Header for all COM-based Gallium APIs and state trackers */ + +import "oaidl.idl"; +import "ocidl.idl"; + +[object, local, uuid("481c9372-795f-4630-bd5b-1f46d33cc28b")] +interface IGalliumAdapter : IUnknown +{ + struct pipe_screen* GetGalliumScreen(); + struct pipe_screen* GetGalliumReferenceSoftwareScreen(); + struct pipe_screen* GetGalliumFastSoftwareScreen(); +} + +[object, local, uuid("2c0f7e72-d9fe-4e7b-9fee-d476695ad5d9")] +interface IGalliumDevice : IUnknown +{ + // turn Gallium resource into API resource + HRESULT OpenGalliumResource( + [in] struct pipe_resource* resource, + [out] IUnknown** api_resource + ); + + /* returns the Gallium context used by the device + * can return NULL if the device uses multiple contexts or doesn't want to implement GetGalliumContext() + * we have this function because often using one context is faster than using more (or it's the only working option) + */ + struct pipe_context* GetGalliumContext(); + + // restore the context state after using the Gallium context for something else + // does nothing if GetGalliumContext returns null + void RestoreGalliumState(); + + /* like RestoreGalliumState, but ignores: + * - constant buffers + * - non-PS samplers and shader resource views + * - blend color, sample mask + * - scissor + * - index buffer + * + * This is intended to restore state after a blit-like operation. + */ + void RestoreGalliumStateBlitOnly(); +}; + +[object, local, uuid("61934787-7aea-412c-8c72-8afe6a33d622")] +interface IGalliumResource : IUnknown +{ + struct pipe_resource* GetGalliumResource(); +}; + diff --git a/src/gallium/state_trackers/d3d1x/gd3dapi/galliumd3d10_1.idl b/src/gallium/state_trackers/d3d1x/gd3dapi/galliumd3d10_1.idl new file mode 100644 index 0000000000..dddb3431b6 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3dapi/galliumd3d10_1.idl @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +import "ocidl.idl"; +import "d3d10_1.idl"; + +// just adding __stdcall to the function makes at least one version of widl crash +[object, local] +interface IDummyInterfaceToPutWidlInComModeForGalliumD3D10 +{} + +HRESULT GalliumD3D10DeviceCreate1(struct pipe_screen* screen, struct pipe_context* context, BOOL owns_context, unsigned creation_flags, IDXGIAdapter* adapter, ID3D10Device1** ppDevice); diff --git a/src/gallium/state_trackers/d3d1x/gd3dapi/galliumd3d11.idl b/src/gallium/state_trackers/d3d1x/gd3dapi/galliumd3d11.idl new file mode 100644 index 0000000000..76f8a7f7f6 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3dapi/galliumd3d11.idl @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +import "ocidl.idl"; +import "d3d11.idl"; + +// just adding __stdcall to the function makes at least one version of widl crash +[object, local] +interface IDummyInterfaceToPutWidlInComModeForGalliumD3D11 +{} + +HRESULT GalliumD3D11DeviceCreate(struct pipe_screen* screen, struct pipe_context* context, BOOL owns_context, unsigned creation_flags, IDXGIAdapter* adapter, ID3D11Device** ppDevice); + diff --git a/src/gallium/state_trackers/d3d1x/gd3dapi/galliumdxgi.idl b/src/gallium/state_trackers/d3d1x/gd3dapi/galliumdxgi.idl new file mode 100644 index 0000000000..c6233c85b9 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3dapi/galliumdxgi.idl @@ -0,0 +1,134 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Header for the Gallium extensions to DXGI */ + +import "galliumcom.idl"; +import "../d3dapi/dxgi.idl"; + +/* These calls set the display system that will be associated + * to new DXGI factories created with CreateDXGIFactory and + * CreateDXGIFactory1 by the current thread. + * + * Existing factories and DXGI objects created from them are + * not affected. + * + * Gallium DXGI has both per-thread and per-process settings. + * If the per-thread display system has been set (i.e. a function + * of these was called, and the last one called was not UseNothing), + * it will be used. + * Otherwise, the per-process display system will be used if set, or + * and other the factory creation call may either fail, or use an + * user-specified default.. + * + * The per-process setting can be altered by calling + * GalliumDXGIMakeDefault, which will set the per-process setting + * according to the current per-thread setting. + * + * GalliumDXGIUseNothing() is the initial state, which means that + * the per-process default should be used, and if that is "use nothing" + * too, the call will either fail or use a user-specified default. + * + * NOTE that setting the per-process default is NOT atomic and must + * not be done concurrently with other calls to GalliumDXGIMakeDefault, + * CreateDXGIFactory or CreateDXGIFactory1. + * + * The PFNHWNDRESOLVER function is passed HWNDs coming from + * the API user and must return window-system-specific values: + * - X11: Window* + * - GDI: HWND + */ + +[object, local, uuid("c22d2f85-f7dd-40b0-a50b-5d308f973c5e")] +interface IGalliumDXGIBackend : IUnknown +{ + /* *present_cookie is set to a cookie that is passed to EndPresent + * + * *window and *rect are the window and subrectangle + * to present in. + * + * For X11, *window is a Window. + * For other systems, it will be the equivalent way to reference a window. + * + * The rectangle is clipped against the window size, so you can + * specify (0, 0, INT_MAX, INT_MAX) to use the whole window. + * + * rgndata is set to either NULL, or the region, in coordinates relative + * to the subrectangle, to clip presentation to. + * *rgndata is valid until EndPresent is called, at which point EndPresent + * may free the data. + * + * However, the rect field should still be set as normal if possible (especially + * the dimension).. + * + * If preserve_aspect_ratio is set, *rgndata will be ignored. This + * limitation may be lifted in future versions. + * + * If the window is fully obscured, return DXGI_STATUS_OCCLUDED. + * Everything else is ignored in that case. + * + * EndPresent is only called when S_OK is returned. + */ + HRESULT BeginPresent( + [in] HWND hwnd, + [out] void** present_cookie, + [out] void** window, + [out] RECT* rect, + [out] struct _RGNDATA** rgndata, + [out] BOOL* preserve_aspect_ratio + ); + + void EndPresent( + [in] HWND hwnd, + [out] void* present_cookie + ); + + /* If the window is fully obscured, return DXGI_STATUS_OCCLUDED, else S_OK */ + HRESULT TestPresent( + [in] HWND hwnd + ); + + /* Get size of rectangle that would be returned by BeginPresent */ + HRESULT GetPresentSize( + [in] HWND hwnd, + [out] unsigned* width, + [out] unsigned* height + ); +} + +void GalliumDXGIUseNothing(); + +/* only a subset of these may be available, depending on platform and compilation options */ +void GalliumDXGIUseX11Display(struct _XDisplay* dpy, IGalliumDXGIBackend* backend); + +/* these don't really work for now +void GalliumDXGIUseDRMCard(int fd); +void GalliumDXGIUseFBDev(int fd); +void GalliumDXGIUseHDC(HDC hdc, IGalliumDXGIGDIBackend* backend); +*/ + +void GalliumDXGIMakeDefault(); + diff --git a/src/gallium/state_trackers/d3d1x/mstools/download-mstools b/src/gallium/state_trackers/d3d1x/mstools/download-mstools new file mode 100755 index 0000000000..15a6317180 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/mstools/download-mstools @@ -0,0 +1,73 @@ +#!/bin/bash +ok=1 +for i in fxc.exe D3DCompiler_43.dll d3dx9_43.dll d3dx10_43.dll d3dx11_43.dll; do + if ! test -e "$i"; then + ok= + fi +done + +if test -n "$ok"; then + exit 0 +fi + +echo "To compile HLSL shaders, the Microsoft HLSL compiler needs to be downloaded." +echo +echo "Downloading Microsoft DirectX June 2010 SDK and extracting files..." +echo "Please wait, this will need to download and unpack a 600 MB file..." +echo +echo "The contribution of a free HLSL compiler would be greatly appreciated!" +echo + +ok=1 +if ! which wget >/dev/null; then + echo "Error: wget is required to download the files" + echo "On Debian or Ubuntu, run the following command to install it:" + echo "sudo apt-get install wget" + echo + ok= +fi + +if ! which cabextract >/dev/null; then + echo "Error: cabextract is required to unpack the files" + echo "On Debian or Ubuntu, run the following command to install it:" + echo "sudo apt-get install cabextract" + echo + ok= +fi + +if test -z "$ok"; then + exit 1 +fi + +dxsdk_file="DXSDK_Jun10.exe" +dxsdk_url="http://download.microsoft.com/download/A/E/7/AE743F1F-632B-4809-87A9-AA1BB3458E31/DXSDK_Jun10.exe" +dxsdk_size=599452800 + +fxc_path="DXSDK/Utilities/bin/x86/fxc.exe" +d3dcompiler_cab_path="DXSDK/Redist/Jun2010_D3DCompiler_43_x86.cab" +d3dx9_cab_path="DXSDK/Redist/Jun2010_d3dx9_43_x86.cab" +d3dx10_cab_path="DXSDK/Redist/Jun2010_d3dx10_43_x86.cab" +d3dx11_cab_path="DXSDK/Redist/Jun2010_d3dx11_43_x86.cab" + +if test "$(stat -c '%s' "$dxsdk_file" 2>/dev/null)" != $dxsdk_size; then + wget --continue "$dxsdk_url" + if test "$(stat -c '%s' "$dxsdk_file" 2>/dev/null)" != $dxsdk_size; then + echo "Failed to download DirectX SDK: expected $dxsdk_file with size $dxsdk_size" + echo "Download manually from $dxsdk_url" + exit 1 + fi +fi + +for i in "$fxc_path" "$d3dcompiler_cab_path" "$d3dx9_cab_path" "$d3dx10_cab_path" "$d3dx11_cab_path"; do + if ! test -e "$i"; then + echo "Please wait, this may take several minutes because a 600 MB archive may need to be fully decompressed..." + cabextract -F "$i" "$dxsdk_file" + fi +done + +for i in "$d3dcompiler_cab_path" "$d3dx9_cab_path" "$d3dx10_cab_path" "$d3dx11_cab_path"; do + cabextract -F "*.dll" "$i" +done + +/bin/cp -dpf "$fxc_path" . + diff --git a/src/gallium/state_trackers/d3d1x/progs/Makefile b/src/gallium/state_trackers/d3d1x/progs/Makefile new file mode 100644 index 0000000000..143e531662 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/Makefile @@ -0,0 +1,46 @@ +LIBRARY_INCLUDES = -Id3d10app -Id3d11app -I../gd3dapi -I../d3dapi -I../w32api +LIBS= \ + ../dxgi/libdxgi.a \ + ../gd3d1x/libgd3d1x.a \ + ../d3d1xshader/libd3d1xshader.a \ + ../d3d1xstutil/libd3d1xstutil.a \ + ../../egl/libegl.a \ + ../../../auxiliary/libgallium.a \ + ../../../winsys/sw/wrapper/libwsw.a \ + ../../../winsys/sw/xlib/libws_xlib.a \ + ../../../winsys/sw/dri/libswdri.a \ + ../../../../../lib/libEGL.so + +LDADD=-ldl + +all: bin/d3d10tri bin/d3d11tri bin/d3d11tex bin/d3d11gears +include ../Makefile.inc + +ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) +LIBS += ../../../winsys/sw/fbdev/libfbdev.a +endif + +ifneq ($(findstring x11, $(EGL_PLATFORMS)),) +LDADD += -ldrm -lX11 -lXext -lXfixes +LIBS += ../../../winsys/sw/xlib/libws_xlib.a +endif + +ifneq ($(findstring kms, $(EGL_PLATFORMS)),) +LDADD += -ldrm +endif + +LIBS_D3D10 = ../dxgid3d10/libdxgid3d10.a ../gd3d10/libgd3d10.a $(LIBS) +LIBS_D3D11 = ../dxgid3d11/libdxgid3d11.a ../gd3d11/libgd3d11.a $(LIBS) + +bin/d3d10tri: d3d10app/d3d10x11main.o d3d10tri/d3d10tri.o $(LIBS_D3D10) + $(CXX) $(CXXFLAGS) $(LDFLAGS) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS_D3D10) -Wl,--end-group $(LDADD) + +bin/d3d11tri: d3d11app/d3d11x11main.o d3d11tri/d3d11tri.o $(LIBS_D3D11) + $(CXX) $(CXXFLAGS) $(LDFLAGS) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS_D3D11) -Wl,--end-group $(LDADD) + +bin/d3d11tex: d3d11app/d3d11x11main.o d3d11tex/d3d11tex.o $(LIBS_D3D11) + $(CXX) $(CXXFLAGS) $(LDFLAGS) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS_D3D11) -Wl,--end-group $(LDADD) + +bin/d3d11gears: d3d11app/d3d11x11main.o d3d11gears/d3d11gears.o $(LIBS_D3D11) + $(CXX) $(CXXFLAGS) $(LDFLAGS) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS_D3D11) -Wl,--end-group $(LDADD) + diff --git a/src/gallium/state_trackers/d3d1x/progs/bin/d3d10tri.exe b/src/gallium/state_trackers/d3d1x/progs/bin/d3d10tri.exe Binary files differnew file mode 100755 index 0000000000..77ab03fcce --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/bin/d3d10tri.exe diff --git a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11gears.exe b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11gears.exe Binary files differnew file mode 100755 index 0000000000..c2cd296a40 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11gears.exe diff --git a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11spikysphere.exe b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11spikysphere.exe Binary files differnew file mode 100755 index 0000000000..c3bc667fb3 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11spikysphere.exe diff --git a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tex.exe b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tex.exe Binary files differnew file mode 100755 index 0000000000..0be5cb8dd7 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tex.exe diff --git a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tri.exe b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tri.exe Binary files differnew file mode 100755 index 0000000000..abada5484b --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tri.exe diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d10app/d3d10app.h b/src/gallium/state_trackers/d3d1x/progs/d3d10app/d3d10app.h new file mode 100755 index 0000000000..59fe338f56 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d10app/d3d10app.h @@ -0,0 +1,51 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef D3D10APP_H +#define D3D10APP_H + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include <objbase.h> +#include <d3d10_1.h> +#include <assert.h> +#include <stdio.h> +#include <float.h> + +#define ensure(x) do {HRESULT __hr = (x); if(!SUCCEEDED(__hr)) {fprintf(stderr, "COM error %08x\n", __hr); abort();}} while(0) + +struct d3d10_application +{ + virtual ~d3d10_application() {} + + virtual void draw(ID3D10Device* ctx, ID3D10RenderTargetView* rtv, unsigned width, unsigned height, double time) = 0; + virtual bool init(ID3D10Device* dev, int argc, char** argv) = 0; +}; + +/* this is the entry point you must provide */ +extern "C" d3d10_application* d3d10_application_create(); + +#endif diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d10app/d3d10winmain.cpp b/src/gallium/state_trackers/d3d1x/progs/d3d10app/d3d10winmain.cpp new file mode 100755 index 0000000000..94680977eb --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d10app/d3d10winmain.cpp @@ -0,0 +1,188 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#define INITGUID +#include "d3d10app.h" +#include "stdio.h" + +static d3d10_application* app; +static IDXGISwapChain* swap_chain; +static unsigned width, height; +static DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM; +static ID3D10Device* dev; +static ID3D10Device* ctx; +static int frames = 0; +static int buffer_count = 1; + +LRESULT CALLBACK WndProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam) +{ + switch (message) + { + case WM_SIZE: + width = lParam & 0xffff; + height = lParam >> 16; + + swap_chain->ResizeBuffers(buffer_count, width, height, format, 0); + frames = 0; + break; + case WM_DESTROY: + PostQuitMessage(0); + break; + default: + return DefWindowProc(hwnd, message, wParam, lParam); + } + return 0; +} + +int main(int argc, char** argv) +{ + HINSTANCE hInstance = GetModuleHandle(NULL); + WNDCLASSEXA wcex; + + wcex.cbSize = sizeof(WNDCLASSEX); + + wcex.style = CS_HREDRAW | CS_VREDRAW; + wcex.lpfnWndProc = WndProc; + wcex.cbClsExtra = 0; + wcex.cbWndExtra = 0; + wcex.hInstance = hInstance; + wcex.hIcon = 0; + wcex.hCursor = LoadCursor(NULL, IDC_ARROW); + wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1); + wcex.lpszMenuName = 0; + wcex.lpszClassName = "d3d10"; + wcex.hIconSm = 0; + + RegisterClassExA(&wcex); + + HWND hwnd = CreateWindowA("d3d10", "d3d10", WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, 0, CW_USEDEFAULT, 0, NULL, NULL, hInstance, NULL); + + if(!hwnd) + return FALSE; + + RECT rc; + GetClientRect(hwnd, &rc ); + width = rc.right - rc.left; + height = rc.bottom - rc.top; + + DXGI_SWAP_CHAIN_DESC swap_chain_desc; + memset(&swap_chain_desc, 0, sizeof(swap_chain_desc)); + swap_chain_desc.BufferDesc.Width = width; + swap_chain_desc.BufferDesc.Height = height; + swap_chain_desc.BufferDesc.Format = format; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.SampleDesc.Quality = 0; + swap_chain_desc.OutputWindow = hwnd; + swap_chain_desc.Windowed = TRUE; + swap_chain_desc.BufferCount = buffer_count; + swap_chain_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; + + D3D10_FEATURE_LEVEL1 feature_level = D3D10_FEATURE_LEVEL_10_0; + + HRESULT hr; + if(1) + { + hr = D3D10CreateDeviceAndSwapChain( + NULL, + D3D10_DRIVER_TYPE_HARDWARE, + NULL, + D3D10_CREATE_DEVICE_SINGLETHREADED, // | D3D10_CREATE_DEVICE_DEBUG, + D3D10_SDK_VERSION, + &swap_chain_desc, + &swap_chain, + &dev); + } + else + { + hr = D3D10CreateDeviceAndSwapChain1( + NULL, + D3D10_DRIVER_TYPE_HARDWARE, + NULL, + D3D10_CREATE_DEVICE_SINGLETHREADED, // | D3D10_CREATE_DEVICE_DEBUG, + feature_level, + D3D10_SDK_VERSION, + &swap_chain_desc, + &swap_chain, + (ID3D10Device1**)&dev); + } + + if(!SUCCEEDED(hr)) + { + fprintf(stderr, "Failed to create D3D10 device (hresult %08x)\n", hr); + return 1; + } + + ctx = dev; + + app = d3d10_application_create(); + if(!app->init(dev, argc, argv)) + return 1; + + ShowWindow(hwnd, SW_SHOWDEFAULT); + UpdateWindow(hwnd); + + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + double period = 1.0 / (double)freq.QuadPart; + LARGE_INTEGER ctime_li; + QueryPerformanceCounter(&ctime_li); + double start_time = ctime_li.QuadPart * period; + + MSG msg; + for(;;) + { + if(PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) + { + if(msg.message == WM_QUIT) + break; + TranslateMessage(&msg); + DispatchMessage(&msg); + } + else if(width && height) + { + ID3D10Texture2D* tex; + static ID3D10RenderTargetView* rtv; + ensure(swap_chain->GetBuffer(0, __uuidof(tex), (void**)&tex)); + ensure(dev->CreateRenderTargetView(tex, NULL, &rtv)); + + QueryPerformanceCounter(&ctime_li); + double ctime = (double)ctime_li.QuadPart * period - start_time; + + app->draw(ctx, rtv, width, height, ctime); + ctx->OMSetRenderTargets(0, 0, 0); + + swap_chain->Present(0, 0); + rtv->Release(); + tex->Release(); + } + else + WaitMessage(); + } + return (int) msg.wParam; +} diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d10app/d3d10x11main.cpp b/src/gallium/state_trackers/d3d1x/progs/d3d10app/d3d10x11main.cpp new file mode 100755 index 0000000000..8f07380056 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d10app/d3d10x11main.cpp @@ -0,0 +1,154 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "d3d10app.h" +#include <X11/Xlib.h> +#include <galliumdxgi.h> +#include <sys/time.h> + +static d3d10_application* app; +static IDXGISwapChain* swap_chain; +unsigned width, height; +DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM; +static ID3D10Device* dev; +static ID3D10Device* ctx; + +double get_time() +{ + struct timeval tv; + gettimeofday(&tv, 0); + return (double)tv.tv_sec + (double)tv.tv_usec * 0.000001; +} + +int main(int argc, char** argv) +{ + Display* dpy = XOpenDisplay(0); + Visual* visual = DefaultVisual(dpy, DefaultScreen(dpy)); + Colormap cmap = XCreateColormap(dpy, RootWindow(dpy, DefaultScreen(dpy)), visual, AllocNone); + XSetWindowAttributes swa; + swa.colormap = cmap; + swa.border_pixel = 0; + swa.event_mask = StructureNotifyMask; + width = 512; + height = 512; + Window win = XCreateWindow(dpy, RootWindow(dpy, DefaultScreen(dpy)), 0, 0, width, height, 0, CopyFromParent, InputOutput, visual, CWBorderPixel | CWColormap| CWEventMask, &swa); + XMapWindow(dpy, win); + + GalliumDXGIUseX11Display(dpy, 0); + + DXGI_SWAP_CHAIN_DESC swap_chain_desc; + memset(&swap_chain_desc, 0, sizeof(swap_chain_desc)); + swap_chain_desc.BufferDesc.Width = width; + swap_chain_desc.BufferDesc.Height = height; + swap_chain_desc.BufferDesc.Format = format; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.SampleDesc.Quality = 0; + swap_chain_desc.OutputWindow = (HWND)win; + swap_chain_desc.Windowed = TRUE; + swap_chain_desc.BufferCount = 3; + swap_chain_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + + D3D10_FEATURE_LEVEL1 feature_level = D3D10_FEATURE_LEVEL_10_0; + + HRESULT hr; + if(0) + { + hr = D3D10CreateDeviceAndSwapChain( + NULL, + D3D10_DRIVER_TYPE_HARDWARE, + NULL, + D3D10_CREATE_DEVICE_SINGLETHREADED, + D3D10_SDK_VERSION, + &swap_chain_desc, + &swap_chain, + &dev); + } + else + { + hr = D3D10CreateDeviceAndSwapChain1( + NULL, + D3D10_DRIVER_TYPE_HARDWARE, + NULL, + D3D10_CREATE_DEVICE_SINGLETHREADED, + feature_level, + D3D10_SDK_VERSION, + &swap_chain_desc, + &swap_chain, + (ID3D10Device1**)&dev); + } + if(!SUCCEEDED(hr)) + { + fprintf(stderr, "Failed to create D3D10 device (hresult %08x)\n", hr); + return 1; + } + ctx = dev; + + app = d3d10_application_create(); + if(!app->init(dev, argc, argv)) + return 1; + + double start_time = get_time(); + + MSG msg; + for(;;) + { + XEvent event; + if(XPending(dpy)) + { + XNextEvent(dpy, &event); + if(event.type == DestroyNotify) + break; + switch(event.type) + { + case ConfigureNotify: + width = event.xconfigure.width; + height = event.xconfigure.height; + swap_chain->ResizeBuffers(3, width, height, format, 0); + break; + } + } + else if(width && height) + { + ID3D10Texture2D* tex; + ID3D10RenderTargetView* rtv; + ensure(swap_chain->GetBuffer(0, IID_ID3D10Texture2D, (void**)&tex)); + ensure(dev->CreateRenderTargetView(tex, NULL, &rtv)); + + double ctime = get_time() - start_time; + + app->draw(ctx, rtv, width, height, ctime); + ctx->OMSetRenderTargets(0, 0, 0); + + tex->Release(); + rtv->Release(); + swap_chain->Present(0, 0); + } + else + XPeekEvent(dpy, &event); + } + return (int) msg.wParam; +} diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.cpp b/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.cpp new file mode 100755 index 0000000000..90b97f8a5d --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.cpp @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "d3d10app.h" +#include "d3d10tri.hlsl.ps.h" +#include "d3d10tri.hlsl.vs.h" + +struct vertex { + float position[4]; + float color[4]; +}; + +static struct vertex vertices[3] = +{ + { + { 0.0f, 0.9f, 0.5f, 1.0f }, + { 1.0f, 0.0f, 0.0f, 1.0f } + }, + { + { 0.9f, -0.9f, 0.5f, 1.0f }, + { 0.0f, 0.0f, 1.0f, 1.0f } + }, + { + { -0.9f, -0.9f, 0.5f, 1.0f }, + { 0.0f, 1.0f, 0.0f, 1.0f } + }, +}; + +struct d3d10tri : public d3d10_application +{ + ID3D10PixelShader* ps; + ID3D10VertexShader* vs; + ID3D10InputLayout* layout; + ID3D10Buffer* vb; + + virtual bool init(ID3D10Device* dev, int argc, char** argv) + { + ensure(dev->CreatePixelShader(g_ps, sizeof(g_ps), &ps)); + ensure(dev->CreateVertexShader(g_vs, sizeof(g_vs), &vs)); + + D3D10_INPUT_ELEMENT_DESC elements[] = + { + {"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0}, + }; + + ensure(dev->CreateInputLayout(elements, sizeof(elements) / sizeof(elements[0]), g_vs, sizeof(g_vs), &layout)); + D3D10_BUFFER_DESC bufferd; + bufferd.ByteWidth = sizeof(vertices); + bufferd.Usage = D3D10_USAGE_IMMUTABLE; + bufferd.BindFlags = D3D10_BIND_VERTEX_BUFFER; + bufferd.CPUAccessFlags = 0; + bufferd.MiscFlags = 0; + + D3D10_SUBRESOURCE_DATA buffersd; + buffersd.pSysMem = vertices; + buffersd.SysMemPitch = sizeof(vertices); + buffersd.SysMemSlicePitch = sizeof(vertices); + + ensure(dev->CreateBuffer(&bufferd, &buffersd, &vb)); + + return true; + } + + virtual void draw(ID3D10Device* ctx, ID3D10RenderTargetView* rtv, unsigned width, unsigned height, double time) + { + float clear_color[4] = {1, 0, 1, 1}; + D3D10_VIEWPORT vp; + memset(&vp, 0, sizeof(vp)); + vp.Width = (unsigned)width; + vp.Height = (unsigned)height; + vp.MaxDepth = 1.0f; + + ctx->OMSetRenderTargets(1, &rtv, 0); + ctx->RSSetViewports(1, &vp); + + ctx->ClearRenderTargetView(rtv, clear_color); + + ctx->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + ctx->IASetInputLayout(layout); + unsigned stride = 2 * 4 * 4; + unsigned offset = 0; + ctx->IASetVertexBuffers(0, 1, &vb, &stride, &offset); + + ctx->VSSetShader(vs); + ctx->PSSetShader(ps); + + ctx->Draw(3, 0); + } +}; + +d3d10_application* d3d10_application_create() +{ + return new d3d10tri(); +} diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.hlsl b/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.hlsl new file mode 100755 index 0000000000..6bdd448ce0 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.hlsl @@ -0,0 +1,50 @@ +/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+struct IA2VS
+{
+ float4 position : POSITION;
+ float4 color : COLOR;
+};
+
+struct VS2PS
+{
+ float4 position : SV_POSITION;
+ float4 color : COLOR;
+};
+
+VS2PS vs(IA2VS input)
+{
+ VS2PS result;
+ result.position = input.position;
+ result.color = input.color;
+ return result;
+}
+
+float4 ps(VS2PS input) : SV_TARGET
+{
+ return input.color;
+}
diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.hlsl.ps.h b/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.hlsl.ps.h new file mode 100755 index 0000000000..bc55cf8a47 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.hlsl.ps.h @@ -0,0 +1,112 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d10tri.hlsl.ps.h /Eps /Tps_4_0 d3d10tri.hlsl +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float +// COLOR 0 xyzw 1 NONE float xyzw +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_TARGET 0 xyzw 0 TARGET float xyzw +// +ps_4_0 +dcl_input_ps linear v1.xyzw +dcl_output o0.xyzw +mov o0.xyzw, v1.xyzw +ret +// Approximately 2 instruction slots used +#endif + +const BYTE g_ps[] = +{ + 68, 88, 66, 67, 206, 120, + 117, 238, 118, 127, 10, 87, + 80, 75, 114, 198, 95, 2, + 120, 102, 1, 0, 0, 0, + 208, 1, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 140, 0, 0, 0, 224, 0, + 0, 0, 20, 1, 0, 0, + 84, 1, 0, 0, 82, 68, + 69, 70, 80, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 255, 255, 0, 1, 0, 0, + 28, 0, 0, 0, 77, 105, + 99, 114, 111, 115, 111, 102, + 116, 32, 40, 82, 41, 32, + 72, 76, 83, 76, 32, 83, + 104, 97, 100, 101, 114, 32, + 67, 111, 109, 112, 105, 108, + 101, 114, 32, 57, 46, 50, + 57, 46, 57, 53, 50, 46, + 51, 49, 49, 49, 0, 171, + 171, 171, 73, 83, 71, 78, + 76, 0, 0, 0, 2, 0, + 0, 0, 8, 0, 0, 0, + 56, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 0, 0, 0, + 68, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 15, 15, 0, 0, + 83, 86, 95, 80, 79, 83, + 73, 84, 73, 79, 78, 0, + 67, 79, 76, 79, 82, 0, + 171, 171, 79, 83, 71, 78, + 44, 0, 0, 0, 1, 0, + 0, 0, 8, 0, 0, 0, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 0, 0, 0, + 83, 86, 95, 84, 65, 82, + 71, 69, 84, 0, 171, 171, + 83, 72, 68, 82, 56, 0, + 0, 0, 64, 0, 0, 0, + 14, 0, 0, 0, 98, 16, + 0, 3, 242, 16, 16, 0, + 1, 0, 0, 0, 101, 0, + 0, 3, 242, 32, 16, 0, + 0, 0, 0, 0, 54, 0, + 0, 5, 242, 32, 16, 0, + 0, 0, 0, 0, 70, 30, + 16, 0, 1, 0, 0, 0, + 62, 0, 0, 1, 83, 84, + 65, 84, 116, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.hlsl.vs.h b/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.hlsl.vs.h new file mode 100755 index 0000000000..7204281ea8 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.hlsl.vs.h @@ -0,0 +1,128 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d10tri.hlsl.vs.h /Evs /Tvs_4_0 d3d10tri.hlsl +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// POSITION 0 xyzw 0 NONE float xyzw +// COLOR 0 xyzw 1 NONE float xyzw +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float xyzw +// COLOR 0 xyzw 1 NONE float xyzw +// +vs_4_0 +dcl_input v0.xyzw +dcl_input v1.xyzw +dcl_output_siv o0.xyzw, position +dcl_output o1.xyzw +mov o0.xyzw, v0.xyzw +mov o1.xyzw, v1.xyzw +ret +// Approximately 3 instruction slots used +#endif + +const BYTE g_vs[] = +{ + 68, 88, 66, 67, 190, 171, + 186, 20, 44, 105, 95, 129, + 137, 204, 223, 72, 251, 159, + 126, 176, 1, 0, 0, 0, + 28, 2, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 140, 0, 0, 0, 220, 0, + 0, 0, 48, 1, 0, 0, + 160, 1, 0, 0, 82, 68, + 69, 70, 80, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 254, 255, 0, 1, 0, 0, + 28, 0, 0, 0, 77, 105, + 99, 114, 111, 115, 111, 102, + 116, 32, 40, 82, 41, 32, + 72, 76, 83, 76, 32, 83, + 104, 97, 100, 101, 114, 32, + 67, 111, 109, 112, 105, 108, + 101, 114, 32, 57, 46, 50, + 57, 46, 57, 53, 50, 46, + 51, 49, 49, 49, 0, 171, + 171, 171, 73, 83, 71, 78, + 72, 0, 0, 0, 2, 0, + 0, 0, 8, 0, 0, 0, + 56, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 15, 0, 0, + 65, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 15, 15, 0, 0, + 80, 79, 83, 73, 84, 73, + 79, 78, 0, 67, 79, 76, + 79, 82, 0, 171, 79, 83, + 71, 78, 76, 0, 0, 0, + 2, 0, 0, 0, 8, 0, + 0, 0, 56, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 15, 0, + 0, 0, 68, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 1, 0, 0, 0, 15, 0, + 0, 0, 83, 86, 95, 80, + 79, 83, 73, 84, 73, 79, + 78, 0, 67, 79, 76, 79, + 82, 0, 171, 171, 83, 72, + 68, 82, 104, 0, 0, 0, + 64, 0, 1, 0, 26, 0, + 0, 0, 95, 0, 0, 3, + 242, 16, 16, 0, 0, 0, + 0, 0, 95, 0, 0, 3, + 242, 16, 16, 0, 1, 0, + 0, 0, 103, 0, 0, 4, + 242, 32, 16, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 101, 0, 0, 3, 242, 32, + 16, 0, 1, 0, 0, 0, + 54, 0, 0, 5, 242, 32, + 16, 0, 0, 0, 0, 0, + 70, 30, 16, 0, 0, 0, + 0, 0, 54, 0, 0, 5, + 242, 32, 16, 0, 1, 0, + 0, 0, 70, 30, 16, 0, + 1, 0, 0, 0, 62, 0, + 0, 1, 83, 84, 65, 84, + 116, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.vcxproj b/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.vcxproj new file mode 100755 index 0000000000..f269e3bbc1 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d10tri/d3d10tri.vcxproj @@ -0,0 +1,98 @@ +<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{5366F4FD-0E6C-40CC-B2F2-CE3D350F0729}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>d3d10tri</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)\d3d10app</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>d3d10.lib;d3d10_1.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)\d3d10app</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>d3d10.lib;d3d10_1.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <CustomBuild Include="d3d10tri.hlsl">
+ <FileType>Document</FileType>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">"$(DXSDK_DIR)\Utilities\bin\x86\fxc.exe" /Fh%(Identity).ps.h /Eps /Tps_4_0 %(Identity)
+"$(DXSDK_DIR)\Utilities\bin\x86\fxc.exe" /Fh%(Identity).vs.h /Evs /Tvs_4_0 %(Identity)</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).ps.h;%(Identity).vs.h;%(Outputs)</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\d3d10app\d3d10app.h" />
+ <ClInclude Include="d3d10tri.hlsl.ps.h" />
+ <ClInclude Include="d3d10tri.hlsl.vs.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\d3d10app\d3d10winmain.cpp" />
+ <ClCompile Include="d3d10tri.cpp" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11app.h b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11app.h new file mode 100755 index 0000000000..53de10ab3e --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11app.h @@ -0,0 +1,51 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef D3D11APP_H +#define D3D11APP_H + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include <objbase.h> +#include <d3d11.h> +#include <assert.h> +#include <stdio.h> +#include <float.h> + +#define ensure(x) do {HRESULT __hr = (x); if(!SUCCEEDED(__hr)) {fprintf(stderr, "COM error %08x\n", __hr); abort();}} while(0) + +struct d3d11_application +{ + virtual ~d3d11_application() {} + + virtual void draw(ID3D11DeviceContext* ctx, ID3D11RenderTargetView* rtv, unsigned width, unsigned height, double time) = 0; + virtual bool init(ID3D11Device* dev, int argc, char** argv) = 0; +}; + +/* this is the entry point you must provide */ +extern "C" d3d11_application* d3d11_application_create(); + +#endif diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11blit.hlsl b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11blit.hlsl new file mode 100755 index 0000000000..4075160d17 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11blit.hlsl @@ -0,0 +1,53 @@ +/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+Texture2D tex;
+sampler samp;
+
+struct IA2VS
+{
+ float4 position : POSITION;
+ float2 texcoord : TEXCOORD;
+};
+
+struct VS2PS
+{
+ float4 position : SV_POSITION;
+ float2 texcoord : TEXCOORD;
+};
+
+VS2PS vs_blit(IA2VS input)
+{
+ VS2PS result;
+ result.position = input.position;
+ result.texcoord = input.texcoord;
+ return result;
+}
+
+float4 ps_blit(VS2PS input) : SV_TARGET
+{
+ return tex.Sample(samp, input.texcoord);
+}
diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11blit.hlsl.ps.h b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11blit.hlsl.ps.h new file mode 100755 index 0000000000..5823b4c976 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11blit.hlsl.ps.h @@ -0,0 +1,142 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11blit.hlsl.ps.h /Eps_blit /Tps_4_0 d3d11blit.hlsl +// +// +// Resource Bindings: +// +// Name Type Format Dim Slot Elements +// ------------------------------ ---------- ------- ----------- ---- -------- +// samp sampler NA NA 0 1 +// tex texture float4 2d 0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float +// TEXCOORD 0 xy 1 NONE float xy +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_TARGET 0 xyzw 0 TARGET float xyzw +// +ps_4_0 +dcl_sampler s0, mode_default +dcl_resource_texture2d (float,float,float,float) t0 +dcl_input_ps linear v1.xy +dcl_output o0.xyzw +sample o0.xyzw, v1.xyxx, t0.xyzw, s0 +ret +// Approximately 2 instruction slots used +#endif + +const BYTE g_ps_blit[] = +{ + 68, 88, 66, 67, 183, 100, + 39, 89, 244, 20, 241, 39, + 36, 169, 159, 230, 234, 214, + 114, 11, 1, 0, 0, 0, + 72, 2, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 212, 0, 0, 0, 44, 1, + 0, 0, 96, 1, 0, 0, + 204, 1, 0, 0, 82, 68, + 69, 70, 152, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 255, 255, 0, 1, 0, 0, + 101, 0, 0, 0, 92, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 97, 0, 0, 0, 2, 0, + 0, 0, 5, 0, 0, 0, + 4, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 1, 0, 0, 0, 12, 0, + 0, 0, 115, 97, 109, 112, + 0, 116, 101, 120, 0, 77, + 105, 99, 114, 111, 115, 111, + 102, 116, 32, 40, 82, 41, + 32, 72, 76, 83, 76, 32, + 83, 104, 97, 100, 101, 114, + 32, 67, 111, 109, 112, 105, + 108, 101, 114, 32, 57, 46, + 50, 57, 46, 57, 53, 50, + 46, 51, 49, 49, 49, 0, + 171, 171, 73, 83, 71, 78, + 80, 0, 0, 0, 2, 0, + 0, 0, 8, 0, 0, 0, + 56, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 0, 0, 0, + 68, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 3, 3, 0, 0, + 83, 86, 95, 80, 79, 83, + 73, 84, 73, 79, 78, 0, + 84, 69, 88, 67, 79, 79, + 82, 68, 0, 171, 171, 171, + 79, 83, 71, 78, 44, 0, + 0, 0, 1, 0, 0, 0, + 8, 0, 0, 0, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 83, 86, + 95, 84, 65, 82, 71, 69, + 84, 0, 171, 171, 83, 72, + 68, 82, 100, 0, 0, 0, + 64, 0, 0, 0, 25, 0, + 0, 0, 90, 0, 0, 3, + 0, 96, 16, 0, 0, 0, + 0, 0, 88, 24, 0, 4, + 0, 112, 16, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 98, 16, 0, 3, 50, 16, + 16, 0, 1, 0, 0, 0, + 101, 0, 0, 3, 242, 32, + 16, 0, 0, 0, 0, 0, + 69, 0, 0, 9, 242, 32, + 16, 0, 0, 0, 0, 0, + 70, 16, 16, 0, 1, 0, + 0, 0, 70, 126, 16, 0, + 0, 0, 0, 0, 0, 96, + 16, 0, 0, 0, 0, 0, + 62, 0, 0, 1, 83, 84, + 65, 84, 116, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11blit.hlsl.vs.h b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11blit.hlsl.vs.h new file mode 100755 index 0000000000..5d9acd20f1 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11blit.hlsl.vs.h @@ -0,0 +1,130 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11blit.hlsl.vs.h /Evs_blit /Tvs_4_0 d3d11blit.hlsl +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// POSITION 0 xyzw 0 NONE float xyzw +// TEXCOORD 0 xy 1 NONE float xy +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float xyzw +// TEXCOORD 0 xy 1 NONE float xy +// +vs_4_0 +dcl_input v0.xyzw +dcl_input v1.xy +dcl_output_siv o0.xyzw, position +dcl_output o1.xy +mov o0.xyzw, v0.xyzw +mov o1.xy, v1.xyxx +ret +// Approximately 3 instruction slots used +#endif + +const BYTE g_vs_blit[] = +{ + 68, 88, 66, 67, 142, 11, + 173, 22, 73, 47, 224, 51, + 147, 83, 148, 177, 56, 17, + 72, 237, 1, 0, 0, 0, + 36, 2, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 140, 0, 0, 0, 224, 0, + 0, 0, 56, 1, 0, 0, + 168, 1, 0, 0, 82, 68, + 69, 70, 80, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 254, 255, 0, 1, 0, 0, + 28, 0, 0, 0, 77, 105, + 99, 114, 111, 115, 111, 102, + 116, 32, 40, 82, 41, 32, + 72, 76, 83, 76, 32, 83, + 104, 97, 100, 101, 114, 32, + 67, 111, 109, 112, 105, 108, + 101, 114, 32, 57, 46, 50, + 57, 46, 57, 53, 50, 46, + 51, 49, 49, 49, 0, 171, + 171, 171, 73, 83, 71, 78, + 76, 0, 0, 0, 2, 0, + 0, 0, 8, 0, 0, 0, + 56, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 15, 0, 0, + 65, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 3, 3, 0, 0, + 80, 79, 83, 73, 84, 73, + 79, 78, 0, 84, 69, 88, + 67, 79, 79, 82, 68, 0, + 171, 171, 79, 83, 71, 78, + 80, 0, 0, 0, 2, 0, + 0, 0, 8, 0, 0, 0, + 56, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 0, 0, 0, + 68, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 3, 12, 0, 0, + 83, 86, 95, 80, 79, 83, + 73, 84, 73, 79, 78, 0, + 84, 69, 88, 67, 79, 79, + 82, 68, 0, 171, 171, 171, + 83, 72, 68, 82, 104, 0, + 0, 0, 64, 0, 1, 0, + 26, 0, 0, 0, 95, 0, + 0, 3, 242, 16, 16, 0, + 0, 0, 0, 0, 95, 0, + 0, 3, 50, 16, 16, 0, + 1, 0, 0, 0, 103, 0, + 0, 4, 242, 32, 16, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 101, 0, 0, 3, + 50, 32, 16, 0, 1, 0, + 0, 0, 54, 0, 0, 5, + 242, 32, 16, 0, 0, 0, + 0, 0, 70, 30, 16, 0, + 0, 0, 0, 0, 54, 0, + 0, 5, 50, 32, 16, 0, + 1, 0, 0, 0, 70, 16, + 16, 0, 1, 0, 0, 0, + 62, 0, 0, 1, 83, 84, + 65, 84, 116, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11u.h b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11u.h new file mode 100755 index 0000000000..1e0ce04ca6 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11u.h @@ -0,0 +1,424 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <vector> + +#include "d3d11blit.hlsl.ps.h" +#include "d3d11blit.hlsl.vs.h" + +template<typename index_type = unsigned> +struct triangle_list_indices : public std::vector<index_type> +{ + unsigned base; + bool flip; + + triangle_list_indices() + : base(0), flip(false) + {} + + void poly(unsigned a, unsigned b, unsigned c) + { + this->push_back(base + a); + this->push_back(base + (flip ? c : b)); + this->push_back(base + (flip ? b : c)); + } + + void poly(unsigned a, unsigned b, unsigned c, unsigned d) + { + poly(a, b, c); + poly(a, c, d); + } + + void poly(unsigned a, unsigned b, unsigned c, unsigned d, unsigned e) + { + poly(a, b, c, d); + poly(a, d, e); + } + + void poly(unsigned a, unsigned b, unsigned c, unsigned d, unsigned e, unsigned f) + { + poly(a, b, c, d, e); + poly(a, e, f); + } + + void poly(unsigned a, unsigned b, unsigned c, unsigned d, unsigned e, unsigned f, unsigned g) + { + poly(a, b, c, d, e, f); + poly(a, f, g); + } + + void poly(unsigned a, unsigned b, unsigned c, unsigned d, unsigned e, unsigned f, unsigned g, unsigned h) + { + poly(a, b, c, d, e, f, g); + poly(a, g, h); + } +}; + +struct mesh +{ + ID3D11InputLayout* layout; + ID3D11Buffer* buffer; + D3D11_PRIMITIVE_TOPOLOGY topology; + unsigned vertex_size; + unsigned draw_count; + DXGI_FORMAT index_format; + unsigned index_offset; + + mesh(ID3D11Device* dev, D3D11_PRIMITIVE_TOPOLOGY topology, + const D3D11_INPUT_ELEMENT_DESC *elements, unsigned num_elements, + const void* vs, unsigned vs_size, + const void* vertices, unsigned vertex_size, unsigned num_vertices, + const void* indices = 0, unsigned index_size = 0, unsigned num_indices = 0) + : topology(topology), vertex_size(vertex_size), draw_count(index_size ? num_indices : num_vertices) + { + dev->CreateInputLayout(elements, num_elements, vs, vs_size, &layout); + if(index_size == 2) + index_format = DXGI_FORMAT_R16_UINT; + else if(index_size == 4) + index_format = DXGI_FORMAT_R32_UINT; + else + index_format = DXGI_FORMAT_UNKNOWN; + this->vertex_size = vertex_size; + index_offset = vertex_size * num_vertices; + + D3D11_BUFFER_DESC bufferd; + memset(&bufferd, 0, sizeof(bufferd)); + bufferd.Usage = D3D11_USAGE_IMMUTABLE; + bufferd.BindFlags = D3D11_BIND_VERTEX_BUFFER; + if(index_format) + bufferd.BindFlags |= D3D11_BIND_INDEX_BUFFER; + bufferd.ByteWidth = index_offset + index_format * num_indices; + + char* data = (char*)malloc(bufferd.ByteWidth); + memcpy(data, vertices, vertex_size * num_vertices); + memcpy(data + index_offset, indices, index_size * num_indices); + + D3D11_SUBRESOURCE_DATA buffersd; + buffersd.pSysMem = data; + + ensure(dev->CreateBuffer(&bufferd, &buffersd, &buffer)); + free(data); + } + + ~mesh() + { + layout->Release(); + buffer->Release(); + } + + void bind(ID3D11DeviceContext* ctx) + { + unsigned offset = 0; + ctx->IASetPrimitiveTopology(topology); + ctx->IASetInputLayout(layout); + if(index_format) + ctx->IASetIndexBuffer(buffer, index_format, index_offset); + ctx->IASetVertexBuffers(0, 1, &buffer, &vertex_size, &offset); + } + + void draw_bound(ID3D11DeviceContext* ctx) + { + if(index_format) + ctx->DrawIndexed(draw_count, 0, 0); + else + ctx->Draw(draw_count, 0); + } + + void bind_and_draw(ID3D11DeviceContext* ctx) + { + bind(ctx); + draw_bound(ctx); + } +}; + +mesh* create_tex_quad(ID3D11Device* dev, const BYTE* vs, unsigned vs_size) +{ + float quad_data[] = { + -1, -1, 0, 1, + -1, 1, 0, 0, + 1, -1, 1, 1, + 1, 1, 1, 0, + }; + + D3D11_INPUT_ELEMENT_DESC elements[2] = + { + {"POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0}, + }; + + return new mesh(dev, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + elements, 2, + vs, vs_size, + quad_data, 4 * sizeof(float), 4, + 0, 0, 0); +} + +struct d3d11_blitter +{ + mesh* quad; + ID3D11VertexShader* vs; + ID3D11PixelShader* ps; + ID3D11SamplerState* sampler[2]; + + d3d11_blitter(ID3D11Device* dev) + { + quad = create_tex_quad(dev, g_vs_blit, sizeof(g_vs_blit)); + + dev->CreateVertexShader(g_vs_blit, sizeof(g_vs_blit), 0, &vs); + dev->CreatePixelShader(g_ps_blit, sizeof(g_ps_blit), 0, &ps); + + for(unsigned i = 0; i < 2; ++i) + { + D3D11_SAMPLER_DESC samplerd; + memset(&samplerd, 0, sizeof(samplerd)); + samplerd.Filter = i ? D3D11_FILTER_MIN_MAG_MIP_LINEAR : D3D11_FILTER_MIN_MAG_MIP_LINEAR; + samplerd.AddressU = samplerd.AddressV = samplerd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + dev->CreateSamplerState(&samplerd, &sampler[i]); + } + } + + void bind(ID3D11DeviceContext* ctx, ID3D11ShaderResourceView* srv, ID3D11RenderTargetView* rtv, float x, float y, float width, float height, bool linear) + { + D3D11_VIEWPORT vp; + vp.TopLeftX = x; + vp.TopLeftY = y; + vp.Width = width; + vp.Height = height; + vp.MinDepth = 0; + vp.MaxDepth = 1; + ctx->RSSetViewports(1, &vp); + ctx->RSSetState(0); + ctx->OMSetBlendState(0, 0, ~0); + ctx->OMSetDepthStencilState(0, 0); + ctx->OMSetRenderTargets(1, &rtv, 0); + ctx->VSSetShader(vs, 0, 0); + ctx->PSSetShader(ps, 0, 0); + ctx->PSSetShaderResources(0, 1, &srv); + ctx->PSSetSamplers(0, 1, &sampler[!!linear]); + quad->bind(ctx); + } + + void draw_bound(ID3D11DeviceContext* ctx) + { + quad->draw_bound(ctx); + } + + void bind_draw_and_unbind(ID3D11DeviceContext* ctx, ID3D11ShaderResourceView* srv, ID3D11RenderTargetView* rtv, float x, float y, float width, float height, bool linear) + { + bind(ctx, srv, rtv, x, y, width, height, linear); + draw_bound(ctx); + unbind(ctx); + } + + void unbind(ID3D11DeviceContext* ctx) + { + void* null = 0; + ctx->PSSetShaderResources(0, 1, (ID3D11ShaderResourceView**)&null); + ctx->PSSetSamplers(0, 1, (ID3D11SamplerState**)&null); + } +}; + +template<typename T, unsigned n> +struct vec_t +{ + T v[n]; + + T& operator [](unsigned i) + { + return v[i]; + } + + const T& operator [](unsigned i) const + { + return v[i]; + } +}; + +template<typename T, unsigned n> +vec_t<T, n> operator -(const vec_t<T, n> a) +{ + vec_t<T, n> r; + for(unsigned i = 0; i < n; ++i) + r[i] = -a[i]; + return r; +} + +template<typename T, unsigned n> +vec_t<T, n> operator +(const vec_t<T, n>& a, const vec_t<T, n>& b) +{ + vec_t<T, n> r; + for(unsigned i = 0; i < n; ++i) + r[i] = a[i] + b[i]; + return r; +} + +template<typename T, unsigned n> +vec_t<T, n>& operator +=(vec_t<T, n>& a, const vec_t<T, n>& b) +{ + for(unsigned i = 0; i < n; ++i) + a[i] += b[i]; + return a; +} + +template<typename T, unsigned r, unsigned c> +struct mat_t : public vec_t<vec_t<T, r>, c> +{}; + +template<typename T, unsigned n> +vec_t<T, n> operator *(const vec_t<T, n>& a, const T& b) +{ + vec_t<T, n> r; + for(unsigned i = 0; i < n; ++i) + r[i] = a[i] * b; + return r; +} + +template<typename T, unsigned n> +vec_t<T, n> operator *(const T& b, const vec_t<T, n>& a) +{ + vec_t<T, n> r; + for(unsigned i = 0; i < n; ++i) + r[i] = a[i] * b; + return r; +} + +template<typename T, unsigned d, unsigned e> +vec_t<T, e> operator *(const mat_t<T, e, d>& m, const vec_t<T, d>& b) +{ + vec_t<T, e> r; + r = m[0] * b[0]; + for(unsigned i = 1; i < d; ++i) + r += m[i] * b[i]; + return r; +} + +template<typename T, unsigned d, unsigned e, unsigned f> +mat_t<T, e, f> operator *(const mat_t<T, e, d>& m, const mat_t<T, d, f>& b) +{ + mat_t<T, e, f> r; + for(unsigned i = 0; i < d; ++i) + r[i] = m * b[i]; + return r; +} + +template<typename T> +vec_t<T, 3> vec(T a, T b, T c) +{ + vec_t<T, 4> v; + v[0] = a; + v[1] = b; + v[2] = c; + return v; +} + +template<typename T> +vec_t<T, 4> vec(T a, T b, T c, T d) +{ + vec_t<T, 4> v; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return v; +} + +typedef mat_t<float, 4, 4> float4x4; +typedef mat_t<float, 4, 3> float4x3; +typedef mat_t<float, 3, 4> float3x4; +typedef mat_t<float, 3, 3> float3x3; + +typedef vec_t<float, 3> float3; +typedef vec_t<float, 4> float4; + +template<typename T> +mat_t<T, 4, 4> mat4x4_frustum(T left, T right, T bottom, T top, T nearval, T farval) +{ + T x = (2.0f * nearval) / (right - left); + T y = (2.0f * nearval) / (top - bottom); + T a = (right + left) / (right - left); + T b = (top + bottom) / (top - bottom); + T c = -(farval + nearval) / (farval - nearval); + T d = -(2.0f * farval * nearval) / (farval - nearval); + T _0 = (T)0; + + mat_t<T, 4, 4> m; + m[0] = vec(x, _0, _0, _0); + m[1] = vec(_0, y, _0, _0); + m[2] = vec(a, b, c, (T)-1); + m[3] = vec(_0, _0, d, _0); + return m; +} + +template<typename T> +mat_t<T, 3, 3> mat3x3_diag(T v) +{ + mat_t<T, 3, 3> m; + T _0 = (T)0; + m[0] = vec(v, _0, _0); + m[1] = vec(_0, v, _0); + m[2] = vec(_0, _0, v); + return m; +} + +template<typename T> +mat_t<T, 4, 4> mat4x4_diag(T v) +{ + mat_t<T, 4, 4> m; + T _0 = (T)0; + m[0] = vec(v, _0, _0, _0); + m[1] = vec(_0, v, _0, _0); + m[2] = vec(_0, _0, v, _0); + m[3] = vec(_0, _0, _0, v); + return m; +} + +template<typename T, unsigned n> +mat_t<T, n, n> mat_push_rotate(const mat_t<T, n, n>& m, unsigned axis, T angle) +{ + T s = (T)sin(angle); + T c = (T)cos(angle); + + mat_t<T, n, n> r = m; + unsigned a = (axis + 1) % 3; + unsigned b = (axis + 2) % 3; + r[a] = (m[a] * c) + (m[b] * s); + r[b] = -(m[a] * s) + (m[b] * c); + return r; +} + +template<typename T, unsigned n> +mat_t<T, n, n> mat_push_translate(const mat_t<T, n, n>& m, float x, float y, float z) +{ + mat_t<T, n, n> r = m; + vec_t<T, n> v; + v[0] = x; + v[1] = y; + v[2] = z; + if(n >= 4) + v[3] = (T)0; + r[3] += m * v; + return r; +} diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11winmain.cpp b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11winmain.cpp new file mode 100755 index 0000000000..8e71ec367e --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11winmain.cpp @@ -0,0 +1,172 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#define INITGUID +#include "d3d11app.h" +#include "stdio.h" + +static d3d11_application* app; +static IDXGISwapChain* swap_chain; +static unsigned width, height; +static DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM; +static ID3D11Device* dev; +static ID3D11DeviceContext* ctx; +static int frames = 0; +static int buffer_count = 1; + +LRESULT CALLBACK WndProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam) +{ + switch (message) + { + case WM_SIZE: + width = lParam & 0xffff; + height = lParam >> 16; + + swap_chain->ResizeBuffers(buffer_count, width, height, format, 0); + frames = 0; + break; + case WM_DESTROY: + PostQuitMessage(0); + break; + default: + return DefWindowProc(hwnd, message, wParam, lParam); + } + return 0; +} + +int main(int argc, char** argv) +{ + HINSTANCE hInstance = GetModuleHandle(NULL); + WNDCLASSEXA wcex; + + wcex.cbSize = sizeof(WNDCLASSEX); + + wcex.style = CS_HREDRAW | CS_VREDRAW; + wcex.lpfnWndProc = WndProc; + wcex.cbClsExtra = 0; + wcex.cbWndExtra = 0; + wcex.hInstance = hInstance; + wcex.hIcon = 0; + wcex.hCursor = LoadCursor(NULL, IDC_ARROW); + wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1); + wcex.lpszMenuName = 0; + wcex.lpszClassName = "d3d11"; + wcex.hIconSm = 0; + + RegisterClassExA(&wcex); + + HWND hwnd = CreateWindowA("d3d11", "d3d11", WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, 0, CW_USEDEFAULT, 0, NULL, NULL, hInstance, NULL); + + if(!hwnd) + return FALSE; + + RECT rc; + GetClientRect(hwnd, &rc ); + width = rc.right - rc.left; + height = rc.bottom - rc.top; + + DXGI_SWAP_CHAIN_DESC swap_chain_desc; + memset(&swap_chain_desc, 0, sizeof(swap_chain_desc)); + swap_chain_desc.BufferDesc.Width = width; + swap_chain_desc.BufferDesc.Height = height; + swap_chain_desc.BufferDesc.Format = format; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.SampleDesc.Quality = 0; + swap_chain_desc.OutputWindow = hwnd; + swap_chain_desc.Windowed = TRUE; + swap_chain_desc.BufferCount = buffer_count; + swap_chain_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; + + D3D_FEATURE_LEVEL feature_level = D3D_FEATURE_LEVEL_10_0; + + HRESULT hr = D3D11CreateDeviceAndSwapChain( + NULL, + D3D_DRIVER_TYPE_HARDWARE, + NULL, + D3D11_CREATE_DEVICE_SINGLETHREADED, // | D3D11_CREATE_DEVICE_DEBUG, + NULL, + 0, + D3D11_SDK_VERSION, + &swap_chain_desc, + &swap_chain, + &dev, + &feature_level, + &ctx); + if(!SUCCEEDED(hr)) + { + fprintf(stderr, "Failed to create D3D11 device (hresult %08x)\n", hr); + return 1; + } + + app = d3d11_application_create(); + if(!app->init(dev, argc, argv)) + return 1; + + ShowWindow(hwnd, SW_SHOWDEFAULT); + UpdateWindow(hwnd); + + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + double period = 1.0 / (double)freq.QuadPart; + LARGE_INTEGER ctime_li; + QueryPerformanceCounter(&ctime_li); + double start_time = ctime_li.QuadPart * period; + + MSG msg; + for(;;) + { + if(PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) + { + if(msg.message == WM_QUIT) + break; + TranslateMessage(&msg); + DispatchMessage(&msg); + } + else if(width && height) + { + ID3D11Texture2D* tex; + static ID3D11RenderTargetView* rtv; + ensure(swap_chain->GetBuffer(0, __uuidof(tex), (void**)&tex)); + ensure(dev->CreateRenderTargetView(tex, NULL, &rtv)); + + QueryPerformanceCounter(&ctime_li); + double ctime = (double)ctime_li.QuadPart * period - start_time; + + app->draw(ctx, rtv, width, height, ctime); + ctx->OMSetRenderTargets(0, 0, 0); + + swap_chain->Present(0, 0); + rtv->Release(); + tex->Release(); + } + else + WaitMessage(); + } + return (int) msg.wParam; +} diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11x11main.cpp b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11x11main.cpp new file mode 100755 index 0000000000..2fadf4eecd --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11app/d3d11x11main.cpp @@ -0,0 +1,114 @@ +#include "d3d11app.h" +#include <X11/Xlib.h> +#include <galliumdxgi.h> +#include <sys/time.h> + +static d3d11_application* app; +static IDXGISwapChain* swap_chain; +unsigned width, height; +DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM; +static ID3D11Device* dev; +static ID3D11DeviceContext* ctx; + +double get_time() +{ + struct timeval tv; + gettimeofday(&tv, 0); + return (double)tv.tv_sec + (double)tv.tv_usec * 0.000001; +} + +int main(int argc, char** argv) +{ + Display* dpy = XOpenDisplay(0); + Visual* visual = DefaultVisual(dpy, DefaultScreen(dpy)); + Colormap cmap = XCreateColormap(dpy, RootWindow(dpy, DefaultScreen(dpy)), visual, AllocNone); + XSetWindowAttributes swa; + swa.colormap = cmap; + swa.border_pixel = 0; + swa.event_mask = StructureNotifyMask; + width = 512; + height = 512; + Window win = XCreateWindow(dpy, RootWindow(dpy, DefaultScreen(dpy)), 0, 0, width, height, 0, CopyFromParent, InputOutput, visual, CWBorderPixel | CWColormap| CWEventMask, &swa); + XMapWindow(dpy, win); + + GalliumDXGIUseX11Display(dpy, 0); + + DXGI_SWAP_CHAIN_DESC swap_chain_desc; + memset(&swap_chain_desc, 0, sizeof(swap_chain_desc)); + swap_chain_desc.BufferDesc.Width = width; + swap_chain_desc.BufferDesc.Height = height; + swap_chain_desc.BufferDesc.Format = format; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.SampleDesc.Quality = 0; + swap_chain_desc.OutputWindow = (HWND)win; + swap_chain_desc.Windowed = TRUE; + swap_chain_desc.BufferCount = 3; + swap_chain_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + + D3D_FEATURE_LEVEL feature_level = D3D_FEATURE_LEVEL_10_0; + + HRESULT hr =D3D11CreateDeviceAndSwapChain( + NULL, + D3D_DRIVER_TYPE_HARDWARE, + NULL, + D3D11_CREATE_DEVICE_SINGLETHREADED, + NULL, + 0, + D3D11_SDK_VERSION, + &swap_chain_desc, + &swap_chain, + &dev, + &feature_level, + &ctx); + if(!SUCCEEDED(hr)) + { + fprintf(stderr, "Failed to create D3D11 device (hresult %08x)\n", hr); + return 1; + } + + app = d3d11_application_create(); + if(!app->init(dev, argc, argv)) + return 1; + + double start_time = get_time(); + + MSG msg; + for(;;) + { + XEvent event; + if(XPending(dpy)) + { + XNextEvent(dpy, &event); + if(event.type == DestroyNotify) + break; + switch(event.type) + { + case ConfigureNotify: + width = event.xconfigure.width; + height = event.xconfigure.height; + swap_chain->ResizeBuffers(3, width, height, format, 0); + break; + } + } + else if(width && height) + { + ID3D11Texture2D* tex; + ID3D11RenderTargetView* rtv; + ensure(swap_chain->GetBuffer(0, IID_ID3D11Texture2D, (void**)&tex)); + ensure(dev->CreateRenderTargetView(tex, NULL, &rtv)); + + double ctime = get_time() - start_time; + + app->draw(ctx, rtv, width, height, ctime); + ctx->OMSetRenderTargets(0, 0, 0); + + tex->Release(); + rtv->Release(); + swap_chain->Present(0, 0); + } + else + XPeekEvent(dpy, &event); + } + return (int) msg.wParam; +} diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.cpp b/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.cpp new file mode 100755 index 0000000000..0edf1f2ef1 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.cpp @@ -0,0 +1,573 @@ +/* +* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. +* Copyright (C) 2009-2010 Luca Barbieri All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +*. +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* +* This is a port of the infamous "glxgears" demo to straight EGL +* Port by Dane Rushton 10 July 2005 +* +* This a rewrite of the 'eglgears' demo in straight Gallium +* Port by Luca Barbieri +* +* This a port of the 'galliumgears' demo to Direct3D 11 +* Port by Luca Barbieri +*/ + +#define _USE_MATH_DEFINES +#include "d3d11app.h" +#include "d3d11u.h" +#include "d3d11gears.hlsl.ps.h" +#include "d3d11gears.hlsl.vs.h" + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <float.h> + +struct gear +{ + struct mesh* mesh; + float x; + float y; + float t0; + float wmul; + float4 color; +}; + +struct cbuf_t +{ + float4x4 projection; + float4x4 modelview; + float4 light; + float4 diffuse; + float4 specular; + float specular_power; + float padding[3]; +}; + +struct gear gears[3]; + +struct vertex +{ + float position[3]; + float normal[3]; + + vertex(float x, float y, float z, float nx, float ny, float nz) + { + position[0] = x; + position[1] = y; + position[2] = z; + normal[0] = nx; + normal[1] = ny; + normal[2] = nz; + } +}; + +#define VERT(x, y, z) vertices.push_back(vertex((x), (y), (z), (nx), (ny), (nz))) + +static mesh* build_gear(ID3D11Device* dev, int triangle_budget, float inner_radius, float outer_radius, float width, int teeth, float tooth_depth) +{ + int i, j, k; + float r0, r1, r2; + float da; + float nx, ny, nz; + int face; + int segs = 4; + int base_triangles = teeth * segs * 2 * 2; + int divs0 = (triangle_budget / base_triangles) - 1; + int divs = (divs0 > 0) ? divs0 : 1; + float* c = (float*)malloc(teeth * segs * sizeof(float)); + float* s = (float*)malloc(teeth * segs * sizeof(float)); + float* dc = (float*)malloc(teeth * segs * divs * sizeof(float)); + float* ds = (float*)malloc(teeth * segs * divs * sizeof(float)); + int num_vertices = teeth * segs * 2 * (3 + 2 * divs); + int num_triangles = base_triangles * (1 + divs); + printf("Creating gear with %i teeth using %i vertices used in %i triangles\n", teeth, num_vertices, num_triangles); + triangle_list_indices<> indices; + std::vector<vertex> vertices; + + r0 = inner_radius; + r1 = outer_radius - tooth_depth / 2.0f; + r2 = outer_radius + tooth_depth / 2.0f; + + da = (float)(2.0 * M_PI / (teeth * segs * divs)); + for(i = 0; i < teeth * segs * divs; ++i) { + float angle = da * i; + ds[i] = sin(angle); + dc[i] = cos(angle); + } + + for(i = 0; i < teeth * segs; ++i) { + s[i] = ds[i * divs]; + c[i] = dc[i * divs]; + } + + /* faces */ + for(face = -1; face <= 1; face += 2) { + float z = width * face * 0.5f; + nx = 0.0f; + ny = 0.0f; + nz = (float)face; + + indices.flip = face > 0; + + assert(segs == 4); + for(i = 0; i < teeth; ++i) { + VERT(r1 * c[segs * i], r1 * s[segs * i], z); + VERT(r2 * c[segs * i + 1], r2 * s[segs * i + 1], z); + VERT(r2 * c[segs * i + 2], r2 * s[segs * i + 2], z); + VERT(r1 * c[segs * i + 3], r1 * s[segs * i + 3], z); + } + + for(i = 0; i < teeth * segs * divs; ++i) { + VERT(r0 * dc[i], r0 * ds[i], z); + } + + for(i = 0; i < teeth; ++i) { + for(j = i * segs; j < (i + 1) * segs; ++j) { + int nextj = j + 1; + if(nextj == teeth * segs) + nextj = 0; + + for(k = j * divs; k < (j + 1) * divs; ++k) { + int nextk = k + 1; + if(nextk == teeth * segs * divs) + nextk = 0; + indices.poly(teeth * segs + k, j, teeth * segs + nextk); + } + + indices.poly(teeth * segs + nextj * divs, j, nextj); + } + } + + indices.base += teeth * segs * (1 + divs); + } + + /* teeth faces */ + indices.flip = true; + float z = width * 0.5f; + + float* coords = (float*)malloc((segs + 1) * 2 * sizeof(float)); + nz = 0; + for(i = 0; i < teeth; i++) { + int next = i + 1; + if(next == teeth) + next = 0; + + coords[0] = r1 * c[segs * i]; + coords[1] = r1 * s[segs * i]; + coords[2] = r2 * c[segs * i + 1]; + coords[3] = r2 * s[segs * i + 1]; + coords[4] = r2 * c[segs * i + 2]; + coords[5] = r2 * s[segs * i + 2]; + coords[6] = r1 * c[segs * i + 3]; + coords[7] = r1 * s[segs * i + 3]; + coords[8] = r1 * c[segs * next]; + coords[9] = r1 * s[segs * next]; + + for(int j = 0; j < segs; ++j) { + float dx = coords[j * 2] - coords[j * 2 + 2]; + float dy = coords[j * 2 + 1] - coords[j * 2 + 3]; + float len = hypotf(dx, dy); + nx = -dy / len; + ny = dx / len; + VERT(coords[j * 2], coords[j * 2 + 1], z); + VERT(coords[j * 2], coords[j * 2 + 1], -z); + VERT(coords[j * 2 + 2], coords[j * 2 + 3], z); + VERT(coords[j * 2 + 2], coords[j * 2 + 3], -z); + + indices.poly(0, 1, 3, 2); + indices.base += 4; + } + } + free(coords); + + /* inner part - simulate a cylinder */ + indices.flip = true; + for(i = 0; i < teeth * segs * divs; i++) { + int next = i + 1; + if(next == teeth * segs * divs) + next = 0; + + nx = -dc[i]; + ny = -ds[i]; + VERT(r0 * dc[i], r0 * ds[i], -width * 0.5f); + VERT(r0 * dc[i], r0 * ds[i], width * 0.5f); + + indices.poly(i * 2, i * 2 + 1, next * 2 + 1, next * 2); + } + + indices.base += teeth * segs * divs * 2; + free(c); + free(s); + free(dc); + free(ds); + + D3D11_INPUT_ELEMENT_DESC elements[2] = + { + {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, + {"NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, + }; + + return new mesh(dev, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + elements, 2, + g_vs, sizeof(g_vs), + &vertices[0], sizeof(vertices[0]), vertices.size(), + &indices[0], sizeof(indices[0]), indices.size()); +} + +struct d3d11gears : public d3d11_application +{ + float view_rotx; + float view_roty; + float view_rotz; + int wireframe; + int triangles; + float speed; + float period; + unsigned impressions; + bool blue_only; + + float last_time; + + int cur_width; + int cur_height; + + ID3D11DepthStencilView* zsv; + ID3D11RenderTargetView* offscreen_rtv; + ID3D11ShaderResourceView* offscreen_srv; + + ID3D11Device* dev; + ID3D11BlendState* blend; + ID3D11DepthStencilState* zsa; + + ID3D11PixelShader* ps; + ID3D11VertexShader* vs; + ID3D11Buffer* cb; + + d3d11_blitter* blitter; + + d3d11gears() + : cur_width(-1), cur_height(-1), zsv(0), offscreen_rtv(0), offscreen_srv(0) + { + view_rotx = (float)(M_PI / 9.0); + view_roty = (float)(M_PI / 6.0); + view_rotz = 0.0f; + wireframe = 0; + triangles = 3200; + speed = 1.0f; + period = -1.0f; + impressions = 1; + blue_only = false; + } + + void draw_one(ID3D11DeviceContext* ctx, cbuf_t& cbd, const float4x4& modelview, float angle) + { + for(unsigned i = blue_only ? 2 : 0; i < 3; ++i) + { + float4x4 m2 = modelview; + m2 = mat_push_translate(m2, gears[i].x, gears[i].y, 0.0f); + m2 = mat_push_rotate(m2, 2, angle * gears[i].wmul + gears[i].t0); + + cbd.modelview = m2; + cbd.diffuse = gears[i].color; + cbd.specular = gears[i].color; + cbd.specular_power = 5.0f; + + ctx->UpdateSubresource(cb, 0, 0, &cbd, 0, 0); + + gears[i].mesh->bind_and_draw(ctx); + } + } + + float get_angle(double time) + { + // designed so that 1 = original glxgears speed + float mod_speed = M_PI * 70.0f / 180.0f * speed; + if(period < 0) + return (float)(time * mod_speed); + else + return (float)(cos(time / period) * period * mod_speed); + } + + void init_for_dimensions(unsigned width, unsigned height) + { + if(zsv) + zsv->Release(); + ID3D11Texture2D* zsbuf; + D3D11_TEXTURE2D_DESC zsbufd; + memset(&zsbufd, 0, sizeof(zsbufd)); + zsbufd.Width = width; + zsbufd.Height = height; + zsbufd.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + zsbufd.ArraySize = 1; + zsbufd.MipLevels = 1; + zsbufd.SampleDesc.Count = 1; + zsbufd.BindFlags = D3D11_BIND_DEPTH_STENCIL; + ensure(dev->CreateTexture2D(&zsbufd, 0, &zsbuf)); + ensure(dev->CreateDepthStencilView(zsbuf, 0, &zsv)); + zsbuf->Release(); + + ID3D11Texture2D* offscreen; + if(offscreen_rtv) + { + offscreen_rtv->Release(); + offscreen_srv->Release(); + offscreen_rtv = 0; + offscreen_srv = 0; + } + + if(impressions > 1) + { + DXGI_FORMAT formats[] = { + DXGI_FORMAT_R32G32B32A32_FLOAT, + DXGI_FORMAT_R16G16B16A16_UNORM, + DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_R10G10B10A2_UNORM, + }; + DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM; // this won't work well at all + unsigned needed_support = D3D11_FORMAT_SUPPORT_RENDER_TARGET | D3D11_FORMAT_SUPPORT_BLENDABLE | D3D11_FORMAT_SUPPORT_SHADER_SAMPLE; + for(unsigned i = 0; i < sizeof(formats); ++i) + { + unsigned support; + dev->CheckFormatSupport(DXGI_FORMAT_R32G32B32A32_FLOAT, &support); + if((support & needed_support) == needed_support) + { + format = formats[i]; + break; + } + } + + + D3D11_TEXTURE2D_DESC offscreend; + memset(&offscreend, 0, sizeof(offscreend)); + offscreend.Width = width; + offscreend.Height = height; + + offscreend.Format = format; + offscreend.MipLevels = 1; + offscreend.ArraySize = 1; + offscreend.SampleDesc.Count = 1; + offscreend.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + ensure(dev->CreateTexture2D(&offscreend, 0, &offscreen)); + ensure(dev->CreateRenderTargetView(offscreen, 0, &offscreen_rtv)); + ensure(dev->CreateShaderResourceView(offscreen, 0, &offscreen_srv)); + offscreen->Release(); + } + + cur_width = width; + cur_height = height; + } + + void draw(ID3D11DeviceContext* ctx, ID3D11RenderTargetView* rtv, unsigned width, unsigned height, double time) + { + D3D11_VIEWPORT vp; + memset(&vp, 0, sizeof(vp)); + vp.Width = (float)width; + vp.Height = (float)height; + vp.MaxDepth = 1.0f; + + if((int)width != cur_width || (int)height != cur_height) + init_for_dimensions(width, height); + + float4 lightpos = vec(5.0f, 5.0f, 10.0f, 0.0f); + float black[4] = {0.0, 0.0, 0.0, 0}; + + float4x4 proj; + float4x4 m; + + float xr = (float)width / (float)height; + float yr = 1.0f; + if(xr < 1.0f) { + yr /= xr; + xr = 1.0f; + } + proj = mat4x4_frustum(-xr, xr, -yr, yr, 5.0f, 60.0f); + + m = mat4x4_diag(1.0f); + m = mat_push_translate(m, 0.0f, 0.0f, -40.0f); + m = mat_push_rotate(m, 0, view_rotx); + m = mat_push_rotate(m, 1, view_roty); + m = mat_push_rotate(m, 2, view_rotz); + + cbuf_t cbd; + + cbd.projection = proj; + cbd.light = lightpos; + + float blend_factor[4] = {1.0f / (float)impressions, 1.0f / (float)impressions, 1.0f / (float)impressions, 1.0f / (float)impressions}; + + ID3D11RenderTargetView* render_rtv; + if(impressions == 1) + render_rtv = rtv; + else + render_rtv = offscreen_rtv; + + ctx->RSSetViewports(1, &vp); + ctx->ClearRenderTargetView(render_rtv, black); + + ctx->PSSetShader(ps, 0, 0); + ctx->VSSetShader(vs, 0, 0); + + ctx->PSSetConstantBuffers(0, 1, &cb); + ctx->VSSetConstantBuffers(0, 1, &cb); + + if(impressions == 1) + { + ctx->OMSetBlendState(0, 0, ~0); + ctx->OMSetDepthStencilState(0, 0); + ctx->OMSetRenderTargets(1, &rtv, zsv); + ctx->ClearDepthStencilView(zsv, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0, 0); + draw_one(ctx, cbd, m, get_angle(time)); + } + else + { + ctx->OMSetBlendState(blend, blend_factor, ~0); + + float time_delta = (float)time - last_time; + float time_delta_per_impression = time_delta / impressions; + float base_time = last_time + time_delta_per_impression / 2; + for(unsigned impression = 0; impression < impressions; ++impression) + { + float impression_time = base_time + time_delta_per_impression * impression; + + ctx->ClearDepthStencilView(zsv, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0, 0); + + // do early z-pass since we must not write any pixel more than once due to blending + for(unsigned pass = 0; pass < 2; ++pass) + { + if(pass == 0) + { + ctx->OMSetRenderTargets(0, 0, zsv); + ctx->OMSetDepthStencilState(0, 0); + } + else + { + ctx->OMSetRenderTargets(1, &render_rtv, zsv); + ctx->OMSetDepthStencilState(zsa, 0); + } + + draw_one(ctx, cbd, m, get_angle(impression_time)); + } + } + + blitter->bind_draw_and_unbind(ctx, offscreen_srv, rtv, 0, 0, (float)width, (float)height, false); + } + last_time = (float)time; + } + + bool init(ID3D11Device* dev, int argc, char** argv) + { + this->dev = dev; + + for(char** p = argv + 1; *p; ++p) { + if(!strcmp(*p, "-w")) + wireframe = 1; + else if(!strcmp(*p, "-b")) + blue_only = true; + else if(!strcmp(*p, "-t")) + triangles = atoi(*++p); + else if(!strcmp(*p, "-m")) + impressions = (float)atof(*++p); + else if(!strcmp(*p, "-p")) + period = (float)atof(*++p); + else if(!strcmp(*p, "-s")) + speed = (float)atof(*++p); + else { + fprintf(stderr, "Usage: d3d11gears [-v|-w] [-t TRIANGLES]\n"); + fprintf(stderr, "d3d11gears is an enhanced port of glxgears to Direct3D 11\n"); + fprintf(stderr, "\n"); + //fprintf(stderr, "-v\t\tuse per-vertex diffuse-only lighting (classic glxgears look)\n"); + fprintf(stderr, "-w\t\twireframe mode\n"); + fprintf(stderr, "-t TRIANGLES\ttriangle budget (default is 3200)\n"); + fprintf(stderr, "-m IMPRESSIONS\tmotion blur impressions (default is 1)\n"); + fprintf(stderr, "-p PERIOD\tspeed reversal period (default is infinite)\n"); + fprintf(stderr, "-s SPEED\tgear speed (default is 1.0)\n"); + fprintf(stderr, "-b\tonly show blue gear (for faster motion blur)\n"); + return false; + } + } + + ensure(dev->CreatePixelShader(g_ps, sizeof(g_ps), NULL, &ps)); + ensure(dev->CreateVertexShader(g_vs, sizeof(g_vs), NULL, &vs)); + + gears[0].color = vec(0.8f, 0.1f, 0.0f, 1.0f); + gears[1].color = vec(0.0f, 0.8f, 0.2f, 1.0f); + gears[2].color = vec(0.2f, 0.2f, 1.0f, 1.0f); + + gears[0].mesh = build_gear(dev, triangles / 2, 1.0f, 4.0f, 1.0f, 20, 0.7f); + gears[1].mesh = build_gear(dev, triangles / 4, 0.5f, 2.0f, 2.0f, 10, 0.7f); + gears[2].mesh = build_gear(dev, triangles / 4, 1.3f, 2.0f, 0.5f, 10, 0.7f); + + gears[0].x = -3.0f; + gears[0].y = -2.0f; + gears[0].wmul = 1.0f; + gears[0].t0 = 0.0 * M_PI / 180.0f; + + gears[1].x = 3.1f; + gears[1].y = -2.0f; + gears[1].wmul = -2.0f; + gears[1].t0 = -9.0f * (float)M_PI / 180.0f; + + gears[2].x = -3.1f; + gears[2].y = 4.2f; + gears[2].wmul = -2.0f; + gears[2].t0 = -25.0f * (float)M_PI / 180.0f; + + D3D11_BUFFER_DESC bufferd; + memset(&bufferd, 0, sizeof(bufferd)); + bufferd.ByteWidth = sizeof(cbuf_t); + bufferd.Usage = D3D11_USAGE_DEFAULT; + bufferd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + ensure(dev->CreateBuffer(&bufferd, 0, &cb)); + + if(impressions > 1) + { + D3D11_BLEND_DESC blendd; + memset(&blendd, 0, sizeof(blendd)); + blendd.RenderTarget[0].BlendEnable = TRUE; + blendd.RenderTarget[0].BlendOp = blendd.RenderTarget[0].BlendOpAlpha + = D3D11_BLEND_OP_ADD; + blendd.RenderTarget[0].SrcBlend = blendd.RenderTarget[0].SrcBlendAlpha + = D3D11_BLEND_BLEND_FACTOR; + blendd.RenderTarget[0].DestBlend = blendd.RenderTarget[0].DestBlendAlpha + = D3D11_BLEND_ONE; + blendd.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + ensure(dev->CreateBlendState(&blendd, &blend)); + + D3D11_DEPTH_STENCIL_DESC zsad; + memset(&zsad, 0, sizeof(zsad)); + zsad.DepthEnable = TRUE; + zsad.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; + zsad.DepthFunc = D3D11_COMPARISON_EQUAL; + ensure(dev->CreateDepthStencilState(&zsad, &zsa)); + + blitter = new d3d11_blitter(dev); + } + + return true; + } +}; + +d3d11_application* d3d11_application_create() +{ + return new d3d11gears(); +} diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.hlsl b/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.hlsl new file mode 100755 index 0000000000..679d417cd7 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.hlsl @@ -0,0 +1,75 @@ +/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+cbuffer cb
+{
+ float4x4 proj;
+ float4x4 modelview;
+ float4 light;
+ float4 diffuse;
+ float4 specular;
+ float specular_power;
+};
+
+struct IA2VS
+{
+ float4 position : POSITION;
+ float3 normal : NORMAL;
+};
+
+struct VS2PS
+{
+ float4 position : SV_POSITION;
+ float3 normal : NORMAL;
+ float3 eye : EYE;
+ float3 light : LIGHT;
+};
+
+VS2PS vs(IA2VS input)
+{
+ VS2PS result;
+
+ float3 view = mul((float3x4)modelview, input.position);
+ result.position = mul((float4x4)proj, float4(view, 1));
+ result.light = light - view;
+ result.eye = -view;
+ result.normal = mul((float3x3)modelview, input.normal);
+
+ return result;
+}
+
+float4 ps(VS2PS input) : SV_TARGET
+{
+ float3 nlight = normalize(input.light);
+ float3 nnormal = normalize(input.normal);
+
+ float diffuse_c = saturate(dot(nnormal, nlight));
+ float specular_c = pow(saturate(dot(nnormal, normalize(normalize(input.eye) + nlight))), specular_power);
+
+ return diffuse * diffuse_c + specular * specular_c;
+}
+
+
diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.hlsl.ps.h b/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.hlsl.ps.h new file mode 100755 index 0000000000..e83b5bb5a8 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.hlsl.ps.h @@ -0,0 +1,309 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11gears.hlsl.ps.h /Eps /Tps_4_0 d3d11gears.hlsl +// +// +// Buffer Definitions: +// +// cbuffer cb +// { +// +// float4x4 proj; // Offset: 0 Size: 64 [unused] +// float4x4 modelview; // Offset: 64 Size: 64 [unused] +// float4 light; // Offset: 128 Size: 16 [unused] +// float4 diffuse; // Offset: 144 Size: 16 +// float4 specular; // Offset: 160 Size: 16 +// float specular_power; // Offset: 176 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim Slot Elements +// ------------------------------ ---------- ------- ----------- ---- -------- +// cb cbuffer NA NA 0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float +// NORMAL 0 xyz 1 NONE float xyz +// EYE 0 xyz 2 NONE float xyz +// LIGHT 0 xyz 3 NONE float xyz +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_TARGET 0 xyzw 0 TARGET float xyzw +// +ps_4_0 +dcl_constantbuffer cb0[12], immediateIndexed +dcl_input_ps linear v1.xyz +dcl_input_ps linear v2.xyz +dcl_input_ps linear v3.xyz +dcl_output o0.xyzw +dcl_temps 3 +dp3 r0.x, v2.xyzx, v2.xyzx +rsq r0.x, r0.x +dp3 r0.y, v3.xyzx, v3.xyzx +rsq r0.y, r0.y +mul r0.yzw, r0.yyyy, v3.xxyz +mad r1.xyz, v2.xyzx, r0.xxxx, r0.yzwy +dp3 r0.x, r1.xyzx, r1.xyzx +rsq r0.x, r0.x +mul r1.xyz, r0.xxxx, r1.xyzx +dp3 r0.x, v1.xyzx, v1.xyzx +rsq r0.x, r0.x +mul r2.xyz, r0.xxxx, v1.xyzx +dp3_sat r0.x, r2.xyzx, r1.xyzx +dp3_sat r0.y, r2.xyzx, r0.yzwy +log r0.x, r0.x +mul r0.x, r0.x, cb0[11].x +exp r0.x, r0.x +mul r1.xyzw, r0.xxxx, cb0[10].xyzw +mad o0.xyzw, cb0[9].xyzw, r0.yyyy, r1.xyzw +ret +// Approximately 20 instruction slots used +#endif + +const BYTE g_ps[] = +{ + 68, 88, 66, 67, 91, 23, + 206, 102, 23, 38, 122, 59, + 55, 123, 215, 57, 98, 213, + 215, 191, 1, 0, 0, 0, + 92, 5, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 192, 1, 0, 0, 80, 2, + 0, 0, 132, 2, 0, 0, + 224, 4, 0, 0, 82, 68, + 69, 70, 132, 1, 0, 0, + 1, 0, 0, 0, 64, 0, + 0, 0, 1, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 255, 255, 0, 1, 0, 0, + 80, 1, 0, 0, 60, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 99, 98, 0, 171, 60, 0, + 0, 0, 6, 0, 0, 0, + 88, 0, 0, 0, 192, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 232, 0, + 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 0, 0, + 0, 0, 240, 0, 0, 0, + 0, 0, 0, 0, 0, 1, + 0, 0, 64, 0, 0, 0, + 64, 0, 0, 0, 0, 0, + 0, 0, 240, 0, 0, 0, + 0, 0, 0, 0, 10, 1, + 0, 0, 128, 0, 0, 0, + 16, 0, 0, 0, 0, 0, + 0, 0, 16, 1, 0, 0, + 0, 0, 0, 0, 32, 1, + 0, 0, 144, 0, 0, 0, + 16, 0, 0, 0, 2, 0, + 0, 0, 16, 1, 0, 0, + 0, 0, 0, 0, 40, 1, + 0, 0, 160, 0, 0, 0, + 16, 0, 0, 0, 2, 0, + 0, 0, 16, 1, 0, 0, + 0, 0, 0, 0, 49, 1, + 0, 0, 176, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 64, 1, 0, 0, + 0, 0, 0, 0, 112, 114, + 111, 106, 0, 171, 171, 171, + 3, 0, 3, 0, 4, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 109, 111, + 100, 101, 108, 118, 105, 101, + 119, 0, 108, 105, 103, 104, + 116, 0, 1, 0, 3, 0, + 1, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 100, 105, 102, 102, 117, 115, + 101, 0, 115, 112, 101, 99, + 117, 108, 97, 114, 0, 115, + 112, 101, 99, 117, 108, 97, + 114, 95, 112, 111, 119, 101, + 114, 0, 0, 0, 3, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 57, + 46, 50, 57, 46, 57, 53, + 50, 46, 51, 49, 49, 49, + 0, 171, 171, 171, 73, 83, + 71, 78, 136, 0, 0, 0, + 4, 0, 0, 0, 8, 0, + 0, 0, 104, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 15, 0, + 0, 0, 116, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 1, 0, 0, 0, 7, 7, + 0, 0, 123, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 2, 0, 0, 0, 7, 7, + 0, 0, 127, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 3, 0, 0, 0, 7, 7, + 0, 0, 83, 86, 95, 80, + 79, 83, 73, 84, 73, 79, + 78, 0, 78, 79, 82, 77, + 65, 76, 0, 69, 89, 69, + 0, 76, 73, 71, 72, 84, + 0, 171, 171, 171, 79, 83, + 71, 78, 44, 0, 0, 0, + 1, 0, 0, 0, 8, 0, + 0, 0, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 15, 0, + 0, 0, 83, 86, 95, 84, + 65, 82, 71, 69, 84, 0, + 171, 171, 83, 72, 68, 82, + 84, 2, 0, 0, 64, 0, + 0, 0, 149, 0, 0, 0, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 12, 0, 0, 0, 98, 16, + 0, 3, 114, 16, 16, 0, + 1, 0, 0, 0, 98, 16, + 0, 3, 114, 16, 16, 0, + 2, 0, 0, 0, 98, 16, + 0, 3, 114, 16, 16, 0, + 3, 0, 0, 0, 101, 0, + 0, 3, 242, 32, 16, 0, + 0, 0, 0, 0, 104, 0, + 0, 2, 3, 0, 0, 0, + 16, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 70, 18, 16, 0, 2, 0, + 0, 0, 70, 18, 16, 0, + 2, 0, 0, 0, 68, 0, + 0, 5, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 16, 0, 0, 7, 34, 0, + 16, 0, 0, 0, 0, 0, + 70, 18, 16, 0, 3, 0, + 0, 0, 70, 18, 16, 0, + 3, 0, 0, 0, 68, 0, + 0, 5, 34, 0, 16, 0, + 0, 0, 0, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 7, 226, 0, + 16, 0, 0, 0, 0, 0, + 86, 5, 16, 0, 0, 0, + 0, 0, 6, 25, 16, 0, + 3, 0, 0, 0, 50, 0, + 0, 9, 114, 0, 16, 0, + 1, 0, 0, 0, 70, 18, + 16, 0, 2, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 150, 7, 16, 0, + 0, 0, 0, 0, 16, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 1, 0, 0, 0, + 70, 2, 16, 0, 1, 0, + 0, 0, 68, 0, 0, 5, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 114, 0, 16, 0, + 1, 0, 0, 0, 6, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 1, 0, + 0, 0, 16, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 70, 18, 16, 0, + 1, 0, 0, 0, 70, 18, + 16, 0, 1, 0, 0, 0, + 68, 0, 0, 5, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 7, + 114, 0, 16, 0, 2, 0, + 0, 0, 6, 0, 16, 0, + 0, 0, 0, 0, 70, 18, + 16, 0, 1, 0, 0, 0, + 16, 32, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 2, 0, + 0, 0, 70, 2, 16, 0, + 1, 0, 0, 0, 16, 32, + 0, 7, 34, 0, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 2, 0, 0, 0, + 150, 7, 16, 0, 0, 0, + 0, 0, 47, 0, 0, 5, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 8, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 10, 128, 32, 0, 0, 0, + 0, 0, 11, 0, 0, 0, + 25, 0, 0, 5, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 8, + 242, 0, 16, 0, 1, 0, + 0, 0, 6, 0, 16, 0, + 0, 0, 0, 0, 70, 142, + 32, 0, 0, 0, 0, 0, + 10, 0, 0, 0, 50, 0, + 0, 10, 242, 32, 16, 0, + 0, 0, 0, 0, 70, 142, + 32, 0, 0, 0, 0, 0, + 9, 0, 0, 0, 86, 5, + 16, 0, 0, 0, 0, 0, + 70, 14, 16, 0, 1, 0, + 0, 0, 62, 0, 0, 1, + 83, 84, 65, 84, 116, 0, + 0, 0, 20, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, + 17, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.hlsl.vs.h b/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.hlsl.vs.h new file mode 100755 index 0000000000..ee931091c2 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.hlsl.vs.h @@ -0,0 +1,308 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11gears.hlsl.vs.h /Evs /Tvs_4_0 d3d11gears.hlsl +// +// +// Buffer Definitions: +// +// cbuffer cb +// { +// +// float4x4 proj; // Offset: 0 Size: 64 +// float4x4 modelview; // Offset: 64 Size: 64 +// float4 light; // Offset: 128 Size: 16 +// float4 diffuse; // Offset: 144 Size: 16 [unused] +// float4 specular; // Offset: 160 Size: 16 [unused] +// float specular_power; // Offset: 176 Size: 4 [unused] +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim Slot Elements +// ------------------------------ ---------- ------- ----------- ---- -------- +// cb cbuffer NA NA 0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// POSITION 0 xyzw 0 NONE float xyzw +// NORMAL 0 xyz 1 NONE float xyz +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float xyzw +// NORMAL 0 xyz 1 NONE float xyz +// EYE 0 xyz 2 NONE float xyz +// LIGHT 0 xyz 3 NONE float xyz +// +vs_4_0 +dcl_constantbuffer cb0[9], immediateIndexed +dcl_input v0.xyzw +dcl_input v1.xyz +dcl_output_siv o0.xyzw, position +dcl_output o1.xyz +dcl_output o2.xyz +dcl_output o3.xyz +dcl_temps 2 +mul r0.xyz, v0.yyyy, cb0[5].xyzx +mad r0.xyz, cb0[4].xyzx, v0.xxxx, r0.xyzx +mad r0.xyz, cb0[6].xyzx, v0.zzzz, r0.xyzx +mad r0.xyz, cb0[7].xyzx, v0.wwww, r0.xyzx +mul r1.xyzw, r0.yyyy, cb0[1].xyzw +mad r1.xyzw, cb0[0].xyzw, r0.xxxx, r1.xyzw +mad r1.xyzw, cb0[2].xyzw, r0.zzzz, r1.xyzw +add o0.xyzw, r1.xyzw, cb0[3].xyzw +mul r1.xyz, v1.yyyy, cb0[5].xyzx +mad r1.xyz, cb0[4].xyzx, v1.xxxx, r1.xyzx +mad o1.xyz, cb0[6].xyzx, v1.zzzz, r1.xyzx +mov o2.xyz, -r0.xyzx +add o3.xyz, -r0.xyzx, cb0[8].xyzx +ret +// Approximately 14 instruction slots used +#endif + +const BYTE g_vs[] = +{ + 68, 88, 66, 67, 251, 82, + 65, 114, 135, 66, 139, 83, + 7, 10, 20, 121, 102, 38, + 44, 36, 1, 0, 0, 0, + 104, 5, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 192, 1, 0, 0, 16, 2, + 0, 0, 160, 2, 0, 0, + 236, 4, 0, 0, 82, 68, + 69, 70, 132, 1, 0, 0, + 1, 0, 0, 0, 64, 0, + 0, 0, 1, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 254, 255, 0, 1, 0, 0, + 80, 1, 0, 0, 60, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 99, 98, 0, 171, 60, 0, + 0, 0, 6, 0, 0, 0, + 88, 0, 0, 0, 192, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 232, 0, + 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 2, 0, + 0, 0, 240, 0, 0, 0, + 0, 0, 0, 0, 0, 1, + 0, 0, 64, 0, 0, 0, + 64, 0, 0, 0, 2, 0, + 0, 0, 240, 0, 0, 0, + 0, 0, 0, 0, 10, 1, + 0, 0, 128, 0, 0, 0, + 16, 0, 0, 0, 2, 0, + 0, 0, 16, 1, 0, 0, + 0, 0, 0, 0, 32, 1, + 0, 0, 144, 0, 0, 0, + 16, 0, 0, 0, 0, 0, + 0, 0, 16, 1, 0, 0, + 0, 0, 0, 0, 40, 1, + 0, 0, 160, 0, 0, 0, + 16, 0, 0, 0, 0, 0, + 0, 0, 16, 1, 0, 0, + 0, 0, 0, 0, 49, 1, + 0, 0, 176, 0, 0, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 64, 1, 0, 0, + 0, 0, 0, 0, 112, 114, + 111, 106, 0, 171, 171, 171, + 3, 0, 3, 0, 4, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 109, 111, + 100, 101, 108, 118, 105, 101, + 119, 0, 108, 105, 103, 104, + 116, 0, 1, 0, 3, 0, + 1, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 100, 105, 102, 102, 117, 115, + 101, 0, 115, 112, 101, 99, + 117, 108, 97, 114, 0, 115, + 112, 101, 99, 117, 108, 97, + 114, 95, 112, 111, 119, 101, + 114, 0, 0, 0, 3, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 77, 105, 99, 114, 111, 115, + 111, 102, 116, 32, 40, 82, + 41, 32, 72, 76, 83, 76, + 32, 83, 104, 97, 100, 101, + 114, 32, 67, 111, 109, 112, + 105, 108, 101, 114, 32, 57, + 46, 50, 57, 46, 57, 53, + 50, 46, 51, 49, 49, 49, + 0, 171, 171, 171, 73, 83, + 71, 78, 72, 0, 0, 0, + 2, 0, 0, 0, 8, 0, + 0, 0, 56, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 15, 15, + 0, 0, 65, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 1, 0, 0, 0, 7, 7, + 0, 0, 80, 79, 83, 73, + 84, 73, 79, 78, 0, 78, + 79, 82, 77, 65, 76, 0, + 79, 83, 71, 78, 136, 0, + 0, 0, 4, 0, 0, 0, + 8, 0, 0, 0, 104, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 116, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 1, 0, 0, 0, + 7, 8, 0, 0, 123, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 2, 0, 0, 0, + 7, 8, 0, 0, 127, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 3, 0, 0, 0, + 7, 8, 0, 0, 83, 86, + 95, 80, 79, 83, 73, 84, + 73, 79, 78, 0, 78, 79, + 82, 77, 65, 76, 0, 69, + 89, 69, 0, 76, 73, 71, + 72, 84, 0, 171, 171, 171, + 83, 72, 68, 82, 68, 2, + 0, 0, 64, 0, 1, 0, + 145, 0, 0, 0, 89, 0, + 0, 4, 70, 142, 32, 0, + 0, 0, 0, 0, 9, 0, + 0, 0, 95, 0, 0, 3, + 242, 16, 16, 0, 0, 0, + 0, 0, 95, 0, 0, 3, + 114, 16, 16, 0, 1, 0, + 0, 0, 103, 0, 0, 4, + 242, 32, 16, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 101, 0, 0, 3, 114, 32, + 16, 0, 1, 0, 0, 0, + 101, 0, 0, 3, 114, 32, + 16, 0, 2, 0, 0, 0, + 101, 0, 0, 3, 114, 32, + 16, 0, 3, 0, 0, 0, + 104, 0, 0, 2, 2, 0, + 0, 0, 56, 0, 0, 8, + 114, 0, 16, 0, 0, 0, + 0, 0, 86, 21, 16, 0, + 0, 0, 0, 0, 70, 130, + 32, 0, 0, 0, 0, 0, + 5, 0, 0, 0, 50, 0, + 0, 10, 114, 0, 16, 0, + 0, 0, 0, 0, 70, 130, + 32, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 6, 16, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 0, 0, + 0, 0, 50, 0, 0, 10, + 114, 0, 16, 0, 0, 0, + 0, 0, 70, 130, 32, 0, + 0, 0, 0, 0, 6, 0, + 0, 0, 166, 26, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 0, 0, 0, 0, + 50, 0, 0, 10, 114, 0, + 16, 0, 0, 0, 0, 0, + 70, 130, 32, 0, 0, 0, + 0, 0, 7, 0, 0, 0, + 246, 31, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 8, 242, 0, 16, 0, + 1, 0, 0, 0, 86, 5, + 16, 0, 0, 0, 0, 0, + 70, 142, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 50, 0, 0, 10, 242, 0, + 16, 0, 1, 0, 0, 0, + 70, 142, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 70, 14, 16, 0, + 1, 0, 0, 0, 50, 0, + 0, 10, 242, 0, 16, 0, + 1, 0, 0, 0, 70, 142, + 32, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 166, 10, + 16, 0, 0, 0, 0, 0, + 70, 14, 16, 0, 1, 0, + 0, 0, 0, 0, 0, 8, + 242, 32, 16, 0, 0, 0, + 0, 0, 70, 14, 16, 0, + 1, 0, 0, 0, 70, 142, + 32, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 56, 0, + 0, 8, 114, 0, 16, 0, + 1, 0, 0, 0, 86, 21, + 16, 0, 1, 0, 0, 0, + 70, 130, 32, 0, 0, 0, + 0, 0, 5, 0, 0, 0, + 50, 0, 0, 10, 114, 0, + 16, 0, 1, 0, 0, 0, + 70, 130, 32, 0, 0, 0, + 0, 0, 4, 0, 0, 0, + 6, 16, 16, 0, 1, 0, + 0, 0, 70, 2, 16, 0, + 1, 0, 0, 0, 50, 0, + 0, 10, 114, 32, 16, 0, + 1, 0, 0, 0, 70, 130, + 32, 0, 0, 0, 0, 0, + 6, 0, 0, 0, 166, 26, + 16, 0, 1, 0, 0, 0, + 70, 2, 16, 0, 1, 0, + 0, 0, 54, 0, 0, 6, + 114, 32, 16, 0, 2, 0, + 0, 0, 70, 2, 16, 128, + 65, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, + 114, 32, 16, 0, 3, 0, + 0, 0, 70, 2, 16, 128, + 65, 0, 0, 0, 0, 0, + 0, 0, 70, 130, 32, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 62, 0, 0, 1, + 83, 84, 65, 84, 116, 0, + 0, 0, 14, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 6, 0, 0, 0, + 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.vcxproj b/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.vcxproj new file mode 100755 index 0000000000..eb2c37a961 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11gears/d3d11gears.vcxproj @@ -0,0 +1,100 @@ +<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{706313AB-8F2C-48D2-9F67-31AA043F48C9}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>d3d11gears</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)\d3d11app</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>d3d11.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)\d3d11app</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>d3d11.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="..\d3d11app\d3d11winmain.cpp" />
+ <ClCompile Include="d3d11gears.cpp" />
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="d3d11gears.hlsl">
+ <FileType>Document</FileType>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">"$(DXSDK_DIR)\Utilities\bin\x86\fxc.exe" /Fh%(Identity).ps.h /Eps /Tps_4_0 %(Identity)
+"$(DXSDK_DIR)\Utilities\bin\x86\fxc.exe" /Fh%(Identity).vs.h /Evs /Tvs_4_0 %(Identity)</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).ps.h;%(Identity).vs.h;%(Outputs)</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\d3d11app\d3d11app.h" />
+ <ClInclude Include="d3d11gears.hlsl.ps.h" />
+ <ClInclude Include="d3d11gears.hlsl.vs.h" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.cpp b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.cpp new file mode 100755 index 0000000000..54ca08f23c --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.cpp @@ -0,0 +1,227 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#define _USE_MATH_DEFINES +#include "d3d11app.h" +#include "d3d11spikysphere.hlsl.vs.h" +#include "d3d11spikysphere.hlsl.hs.h" +#include "d3d11spikysphere.hlsl.ds.h" +#include "d3d11spikysphere.hlsl.ps.h" + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <float.h> +#include <D3DX10math.h> + +struct cb_frame_t +{ + D3DXMATRIX model; + D3DXMATRIX view_proj; + float disp_scale; + float disp_freq; + float tess_factor; +}; + +static float vertex_data[] = +{ + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, + + 0.0, 1.0, 0.0, + -1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, + + 0.0, -1.0, 0.0, + 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, + + -1.0, 0.0, 0.0, + 0.0, -1.0, 0.0, + 0.0, 0.0, 1.0, + + 0.0, 1.0, 0.0, + 1.0, 0.0, 0.0, + 0.0, 0.0, -1.0, + + -1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, -1.0, + + 1.0, 0.0, 0.0, + 0.0, -1.0, 0.0, + 0.0, 0.0, -1.0, + + 0.0, -1.0, 0.0, + -1.0, 0.0, 0.0, + 0.0, 0.0, -1.0, +}; + +struct d3d11spikysphere : public d3d11_application +{ + ID3D11Device* dev; + ID3D11PixelShader* ps; + ID3D11DomainShader* ds; + ID3D11HullShader* hs; + ID3D11VertexShader* vs; + ID3D11InputLayout* layout; + ID3D11Buffer* vb; + ID3D11RenderTargetView* rtv; + ID3D11DepthStencilView* zsv; + ID3D11Buffer* cb_frame; + + int cur_width; + int cur_height; + + d3d11spikysphere() + : cur_width(-1), cur_height(-1), zsv(0) + {} + + bool init(ID3D11Device* dev, int argc, char** argv) + { + this->dev = dev; + ensure(dev->CreateVertexShader(g_vs, sizeof(g_vs), NULL, &vs)); + ensure(dev->CreateHullShader(g_hs, sizeof(g_hs), NULL, &hs)); + ensure(dev->CreateDomainShader(g_ds, sizeof(g_ds), NULL, &ds)); + ensure(dev->CreatePixelShader(g_ps, sizeof(g_ps), NULL, &ps)); + + D3D11_INPUT_ELEMENT_DESC elements[1] = + { + {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, + 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, + }; + + ensure(dev->CreateInputLayout(elements, 1, g_vs, sizeof(g_vs), &layout)); + + D3D11_BUFFER_DESC bufferd; + bufferd.ByteWidth = sizeof(vertex_data); + bufferd.Usage = D3D11_USAGE_IMMUTABLE; + bufferd.BindFlags = D3D11_BIND_VERTEX_BUFFER; + bufferd.CPUAccessFlags = 0; + bufferd.MiscFlags = 0; + bufferd.StructureByteStride = 0; + + D3D11_SUBRESOURCE_DATA buffersd; + buffersd.pSysMem = vertex_data; + + ensure(dev->CreateBuffer(&bufferd, &buffersd, &vb)); + + D3D11_BUFFER_DESC cbd; + cbd.ByteWidth = (sizeof(cb_frame_t) + 15) & ~15; + cbd.Usage = D3D11_USAGE_DYNAMIC; + cbd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + cbd.MiscFlags = 0; + cbd.StructureByteStride = 0; + + ensure(dev->CreateBuffer(&cbd, NULL, &cb_frame)); + return true; + } + + void draw(ID3D11DeviceContext* ctx, ID3D11RenderTargetView* rtv, unsigned width, unsigned height, double time) + { + D3D11_VIEWPORT vp; + memset(&vp, 0, sizeof(vp)); + vp.Width = (float)width; + vp.Height = (float)height; + vp.MaxDepth = 1.0f; + + if(width != cur_width || height != cur_height) + { + if(zsv) + zsv->Release(); + ID3D11Texture2D* zsbuf; + D3D11_TEXTURE2D_DESC zsbufd; + memset(&zsbufd, 0, sizeof(zsbufd)); + zsbufd.Width = width; + zsbufd.Height = height; + zsbufd.Format = DXGI_FORMAT_D32_FLOAT; + zsbufd.ArraySize = 1; + zsbufd.MipLevels = 1; + zsbufd.SampleDesc.Count = 1; + zsbufd.BindFlags = D3D11_BIND_DEPTH_STENCIL; + ensure(dev->CreateTexture2D(&zsbufd, 0, &zsbuf)); + ensure(dev->CreateDepthStencilView(zsbuf, 0, &zsv)); + zsbuf->Release(); + } + + float black[4] = {0, 0, 0, 0}; + + D3D11_MAPPED_SUBRESOURCE map; + ensure(ctx->Map(cb_frame, 0, D3D11_MAP_WRITE_DISCARD, 0, &map)); + cb_frame_t* cb_frame_data = (cb_frame_t*)map.pData; + D3DXMatrixIdentity(&cb_frame_data->model); + + D3DXMATRIX view; + D3DXVECTOR3 eye(2.0f * (float)sin(time), 0.0f, 2.0f * (float)cos(time)); + D3DXVECTOR3 at(0, 0, 0); + D3DXVECTOR3 up(0, 1, 0); + D3DXMatrixLookAtLH(&view, &eye, &at, &up); + D3DXMATRIX proj; + D3DXMatrixPerspectiveLH(&proj, 1.1f, 1.1f, 1.0f, 3.0f); + + cb_frame_data->view_proj = view * proj; + float min_tess_factor = 1.0f; + cb_frame_data->tess_factor = (1.0f - (float)cos(time)) * ((64.0f - min_tess_factor) / 2.0f) + min_tess_factor; + cb_frame_data->disp_scale = 0.9f; + //cb_frame_data->disp_scale = (sin(time) + 1.0) / 2.0; + cb_frame_data->disp_freq = 5.0f * (float)M_PI; + //cb_frame_data->disp_freq = (4.0 + 4.0 * cos(time / 5.0)) * PI; + ctx->Unmap(cb_frame, 0); + + ctx->HSSetConstantBuffers(0, 1, &cb_frame); + ctx->DSSetConstantBuffers(0, 1, &cb_frame); + + //ctx->OMSetBlendState(bs, black, ~0); + //ctx->OMSetDepthStencilState(dss, 0); + ctx->OMSetRenderTargets(1, &rtv, zsv); + //ctx->RSSetState(rs); + ctx->RSSetViewports(1, &vp); + + ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST); + ctx->IASetInputLayout(layout); + unsigned stride = 3 * 4; + unsigned offset = 0; + ctx->IASetVertexBuffers(0, 1, &vb, &stride, &offset); + + ctx->VSSetShader(vs, NULL, 0); + ctx->HSSetShader(hs, NULL, 0); + ctx->DSSetShader(ds, NULL, 0); + ctx->GSSetShader(NULL, NULL, 0); + ctx->PSSetShader(ps, NULL, 0); + + ctx->ClearRenderTargetView(rtv, black); + ctx->ClearDepthStencilView(zsv, D3D11_CLEAR_DEPTH, 1.0f, 0); + + ctx->Draw(3 * 8, 0); + } +}; + +d3d11_application* d3d11_application_create() +{ + return new d3d11spikysphere(); +} diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl new file mode 100755 index 0000000000..1edf42f769 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl @@ -0,0 +1,193 @@ +/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#define INPUT_PATCH_SIZE 3
+#define OUTPUT_PATCH_SIZE 3
+
+static const float PI = 3.141592653589793238462643f;
+
+cbuffer cb_frame
+{
+ float4x4 model;
+ float4x4 view_proj;
+ float disp_scale;
+ float disp_freq;
+ float tess_factor;
+};
+
+struct IA2VS
+{
+ float3 position : POSITION;
+};
+
+struct VS2HS
+{
+ float3 position : POSITION;
+};
+
+VS2HS vs(IA2VS input)
+{
+ VS2HS result;
+ result.position = input.position;
+ return result;
+}
+
+struct HS2DS_PATCH
+{
+ float tessouter[3] : SV_TessFactor;
+ float tessinner[1] : SV_InsideTessFactor;
+};
+
+struct HS2DS
+{
+ float3 position : POSITION;
+};
+
+HS2DS_PATCH hs_patch(InputPatch<VS2HS, INPUT_PATCH_SIZE> ip)
+{
+ HS2DS_PATCH result;
+
+ result.tessouter[0] = result.tessouter[1] = result.tessouter[2]
+ = result.tessinner[0] = tess_factor;
+ return result;
+}
+
+[domain("tri")]
+[partitioning("fractional_even")]
+[outputtopology("triangle_cw")]
+[outputcontrolpoints(OUTPUT_PATCH_SIZE)]
+[patchconstantfunc("hs_patch")]
+HS2DS hs(InputPatch<VS2HS, INPUT_PATCH_SIZE> p, uint i : SV_OutputControlPointID)
+{
+ HS2DS result;
+ result.position = p[i].position;
+ return result;
+}
+
+struct DS2PS
+{
+ float4 position : SV_POSITION;
+ float3 objpos : OBJPOS;
+ // float3 worldpos : WORLDPOS;
+ float3 objnormal : OBJNORMAL;
+ float3 worldnormal : WORLDNORMAL;
+};
+
+float3 dnormf_dt(float3 f, float3 dfdt)
+{
+ float ff = dot(f, f);
+ return (ff * dfdt - dot(f, dfdt) * f) / (ff * sqrt(ff));
+}
+
+float3 map(float3 p, float3 q, float3 r, float3 k)
+{
+ return normalize(p * k.x + q * k.y + r * k.z);
+}
+
+float3 dmap_du(float3 p, float3 q, float3 r, float3 k)
+{
+ return dnormf_dt(p * k.x + q * k.y + r * k.z, p);
+}
+
+float dispf(float v)
+{
+ return cos(v * disp_freq);
+}
+
+float ddispf(float v)
+{
+ return -sin(v * disp_freq) * disp_freq;
+}
+
+float disp(float3 k)
+{
+ return dispf(k.x) * dispf(k.y) * dispf(k.z);
+}
+
+float ddisp_du(float3 k)
+{
+ return ddispf(k.x) * dispf(k.y) * dispf(k.z);
+}
+
+float3 ddisp(float3 k)
+{
+ float3 f = float3(dispf(k.x), dispf(k.y), dispf(k.z));
+ return float3(ddispf(k.x) * f.y * f.z, ddispf(k.y) * f.z * f.x, ddispf(k.z) * f.x * f.y);
+}
+
+[domain("tri")]
+DS2PS ds(HS2DS_PATCH input,
+ float3 k : SV_DomainLocation,
+ const OutputPatch<HS2DS, OUTPUT_PATCH_SIZE> patch)
+{
+ DS2PS result;
+
+ float3 s = map(patch[0].position, patch[1].position, patch[2].position, k);
+ float3 d = 1.0 + disp(s) * disp_scale;
+ result.objpos = s * d;
+ result.objpos /= (1.0 + disp_scale);
+ float3 worldpos = mul(model, float4(result.objpos, 1.0f));
+ result.position = mul(view_proj, float4(worldpos, 1.0f));
+
+ float3 dd = ddisp(s) * disp_scale;
+
+ /*
+ float3 ds_du = dmap_du(patch[0].position, patch[1].position, patch[2].position, k);
+ float3 ds_dv = dmap_du(patch[1].position, patch[2].position, patch[0].position, k.yzx);
+ float3 ds_dw = dmap_du(patch[2].position, patch[0].position, patch[1].position, k.zxy);
+
+ float3 ds_dU = ds_du - ds_dw;
+ float3 ds_dV = ds_dv - ds_dw;
+
+ float3 dc_dU = s * dot(dd, ds_dU) + ds_dU * d;
+ float3 dc_dV = s * dot(dd, ds_dV) + ds_dV * d;
+ */
+
+ // this should be faster
+ float3 _u = normalize((abs(s.x) > abs(s.y)) ? float3(-s.z, 0, s.x) : float3(0, -s.z, s.y));
+ float3 _v = normalize(cross(s, _u));
+ float3 dc_dU = s * dot(dd, _u) + _u * d;
+ float3 dc_dV = s * dot(dd, _v) + _v * d;
+
+ result.objnormal = normalize(cross(dc_dU, dc_dV));
+ result.worldnormal = mul(model, result.objnormal);
+ return result;
+}
+
+float4 ps(DS2PS input) : SV_TARGET
+{
+ float3 pseudoambient = float3(0.4, 0.4, 0.6);
+ float3 diffuse = float3(0.6, 0.6, 0.4);
+ float3 light = normalize(float3(0, 1, -1));
+
+ float4 r;
+// r.xyz = normalize(input.objpos + 2 * input.objnormal);
+ r.xyz = pseudoambient * saturate(dot(normalize(input.objnormal), normalize(input.objpos)));
+ r.xyz += saturate(dot(light, normalize(input.worldnormal))) * diffuse;
+
+ r.w = 1;
+ return r;
+}
diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.ds.h b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.ds.h new file mode 100755 index 0000000000..45045e5c61 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.ds.h @@ -0,0 +1,623 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11spikysphere.hlsl.ds.h /Eds /Tds_5_0 d3d11spikysphere.hlsl +// +// +// Buffer Definitions: +// +// cbuffer cb_frame +// { +// +// float4x4 model; // Offset: 0 Size: 64 +// float4x4 view_proj; // Offset: 64 Size: 64 +// float disp_scale; // Offset: 128 Size: 4 +// float disp_freq; // Offset: 132 Size: 4 +// float tess_factor; // Offset: 136 Size: 4 [unused] +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim Slot Elements +// ------------------------------ ---------- ------- ----------- ---- -------- +// cb_frame cbuffer NA NA 0 1 +// +// +// +// Patch Constant signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_TessFactor 0 x 0 TRIEDGE float +// SV_TessFactor 1 x 1 TRIEDGE float +// SV_TessFactor 2 x 2 TRIEDGE float +// SV_InsideTessFactor 0 x 3 TRIINT float +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// POSITION 0 xyz 0 NONE float xyz +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float xyzw +// OBJPOS 0 xyz 1 NONE float xyz +// OBJNORMAL 0 xyz 2 NONE float xyz +// WORLDNORMAL 0 xyz 3 NONE float xyz +// +// Tessellation Domain # of control points +// -------------------- -------------------- +// Triangle 3 +// +ds_5_0 +dcl_input_control_point_count 3 +dcl_tessellator_domain domain_tri +dcl_globalFlags refactoringAllowed +dcl_constantbuffer cb0[9], immediateIndexed +dcl_input vDomain.xyz +dcl_input vicp[3][0].xyz +dcl_output_siv o0.xyzw, position +dcl_output o1.xyz +dcl_output o2.xyz +dcl_output o3.xyz +dcl_temps 5 +add r0.x, cb0[8].x, l(1.000000) +mul r0.yzw, vDomain.yyyy, vicp[1][0].yyzx +mad r0.yzw, vicp[0][0].yyzx, vDomain.xxxx, r0.yyzw +mad r0.yzw, vicp[2][0].yyzx, vDomain.zzzz, r0.yyzw +dp3 r1.x, r0.yzwy, r0.yzwy +rsq r1.x, r1.x +mul r0.yzw, r0.yyzw, r1.xxxx +mul r1.xyz, r0.wyzw, cb0[8].yyyy +sincos null, r2.xyz, r1.zxyz +sincos r1.xyz, null, -r1.xyzx +mul r1.xyz, r1.xyzx, cb0[8].yyyy +mul r1.xyz, r2.zxyz, r1.xyzx +mul r1.xyz, r2.xyzx, r1.xyzx +mul r1.xyz, r1.xyzx, cb0[8].xxxx +mul r1.w, r2.z, r2.y +mul r1.w, r2.x, r1.w +mad r1.w, r1.w, cb0[8].x, l(1.000000) +mul r2.xyz, r0.wyzw, r1.wwww +div r2.xyz, r2.xyzx, r0.xxxx +mul r3.xyz, r2.yyyy, cb0[1].xyzx +mad r3.xyz, cb0[0].xyzx, r2.xxxx, r3.xyzx +mad r3.xyz, cb0[2].xyzx, r2.zzzz, r3.xyzx +mov o1.xyz, r2.xyzx +add r2.xyz, r3.xyzx, cb0[3].xyzx +mul r3.xyzw, r2.yyyy, cb0[5].xyzw +mad r3.xyzw, cb0[4].xyzw, r2.xxxx, r3.xyzw +mad r2.xyzw, cb0[6].xyzw, r2.zzzz, r3.xyzw +add o0.xyzw, r2.xyzw, cb0[7].xyzw +mov r2.y, l(0) +lt r0.x, |r0.y|, |r0.w| +mul r2.xz, r0.zzwz, l(-1.000000, 0.000000, 1.000000, 0.000000) +mov r2.w, r0.y +movc r2.xyz, r0.xxxx, r2.zxyz, r2.wyxw +dp3 r0.x, r2.xyzx, r2.xyzx +rsq r0.x, r0.x +mul r2.xyz, r0.xxxx, r2.xyzx +mul r3.xyz, r0.wyzw, r2.xyzx +mad r3.xyz, r0.zwyz, r2.yzxy, -r3.xyzx +dp3 r0.x, r3.xyzx, r3.xyzx +rsq r0.x, r0.x +mul r3.xyz, r0.xxxx, r3.xyzx +dp3 r0.x, r1.yzxy, r3.xyzx +mul r3.xyz, r1.wwww, r3.xyzx +mul r4.xyz, r1.wwww, r2.xyzx +dp3 r1.x, r1.zxyz, r2.xyzx +mad r1.xyz, r0.zwyz, r1.xxxx, r4.xyzx +mad r0.xyz, r0.yzwy, r0.xxxx, r3.xyzx +mul r2.xyz, r0.xyzx, r1.xyzx +mad r0.xyz, r1.zxyz, r0.yzxy, -r2.xyzx +dp3 r0.w, r0.xyzx, r0.xyzx +rsq r0.w, r0.w +mul r0.xyz, r0.wwww, r0.xyzx +mov o2.xyz, r0.xyzx +mul r1.xyz, r0.yyyy, cb0[1].xyzx +mad r0.xyw, cb0[0].xyxz, r0.xxxx, r1.xyxz +mad o3.xyz, cb0[2].xyzx, r0.zzzz, r0.xywx +ret +// Approximately 57 instruction slots used +#endif + +const BYTE g_ds[] = +{ + 68, 88, 66, 67, 0, 128, + 111, 5, 170, 61, 238, 30, + 169, 104, 139, 245, 182, 233, + 180, 255, 1, 0, 0, 0, + 112, 11, 0, 0, 6, 0, + 0, 0, 56, 0, 0, 0, + 68, 2, 0, 0, 120, 2, + 0, 0, 12, 3, 0, 0, + 168, 3, 0, 0, 212, 10, + 0, 0, 82, 68, 69, 70, + 4, 2, 0, 0, 1, 0, + 0, 0, 104, 0, 0, 0, + 1, 0, 0, 0, 60, 0, + 0, 0, 0, 5, 83, 68, + 0, 1, 0, 0, 210, 1, + 0, 0, 82, 68, 49, 49, + 60, 0, 0, 0, 24, 0, + 0, 0, 32, 0, 0, 0, + 40, 0, 0, 0, 36, 0, + 0, 0, 12, 0, 0, 0, + 0, 0, 0, 0, 92, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 99, 98, 95, 102, 114, 97, + 109, 101, 0, 171, 171, 171, + 92, 0, 0, 0, 5, 0, + 0, 0, 128, 0, 0, 0, + 144, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 72, 1, 0, 0, 0, 0, + 0, 0, 64, 0, 0, 0, + 2, 0, 0, 0, 88, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 124, 1, + 0, 0, 64, 0, 0, 0, + 64, 0, 0, 0, 2, 0, + 0, 0, 88, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 134, 1, 0, 0, + 128, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 152, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 188, 1, 0, 0, 132, 0, + 0, 0, 4, 0, 0, 0, + 2, 0, 0, 0, 152, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 198, 1, + 0, 0, 136, 0, 0, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 152, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 109, 111, 100, 101, + 108, 0, 102, 108, 111, 97, + 116, 52, 120, 52, 0, 171, + 3, 0, 3, 0, 4, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 78, 1, 0, 0, + 118, 105, 101, 119, 95, 112, + 114, 111, 106, 0, 100, 105, + 115, 112, 95, 115, 99, 97, + 108, 101, 0, 102, 108, 111, + 97, 116, 0, 171, 0, 0, + 3, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 145, 1, 0, 0, 100, 105, + 115, 112, 95, 102, 114, 101, + 113, 0, 116, 101, 115, 115, + 95, 102, 97, 99, 116, 111, + 114, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 57, 46, 50, 57, 46, + 57, 53, 50, 46, 51, 49, + 49, 49, 0, 171, 73, 83, + 71, 78, 44, 0, 0, 0, + 1, 0, 0, 0, 8, 0, + 0, 0, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 7, 7, + 0, 0, 80, 79, 83, 73, + 84, 73, 79, 78, 0, 171, + 171, 171, 80, 67, 83, 71, + 140, 0, 0, 0, 4, 0, + 0, 0, 8, 0, 0, 0, + 104, 0, 0, 0, 0, 0, + 0, 0, 13, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 104, 0, 0, 0, 1, 0, + 0, 0, 13, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 104, 0, 0, 0, 2, 0, + 0, 0, 13, 0, 0, 0, + 3, 0, 0, 0, 2, 0, + 0, 0, 1, 0, 0, 0, + 118, 0, 0, 0, 0, 0, + 0, 0, 14, 0, 0, 0, + 3, 0, 0, 0, 3, 0, + 0, 0, 1, 0, 0, 0, + 83, 86, 95, 84, 101, 115, + 115, 70, 97, 99, 116, 111, + 114, 0, 83, 86, 95, 73, + 110, 115, 105, 100, 101, 84, + 101, 115, 115, 70, 97, 99, + 116, 111, 114, 0, 171, 171, + 79, 83, 71, 78, 148, 0, + 0, 0, 4, 0, 0, 0, + 8, 0, 0, 0, 104, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 116, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 1, 0, 0, 0, + 7, 8, 0, 0, 123, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 2, 0, 0, 0, + 7, 8, 0, 0, 133, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 3, 0, 0, 0, + 7, 8, 0, 0, 83, 86, + 95, 80, 79, 83, 73, 84, + 73, 79, 78, 0, 79, 66, + 74, 80, 79, 83, 0, 79, + 66, 74, 78, 79, 82, 77, + 65, 76, 0, 87, 79, 82, + 76, 68, 78, 79, 82, 77, + 65, 76, 0, 171, 171, 171, + 83, 72, 69, 88, 36, 7, + 0, 0, 80, 0, 4, 0, + 201, 1, 0, 0, 147, 24, + 0, 1, 149, 16, 0, 1, + 106, 8, 0, 1, 89, 0, + 0, 4, 70, 142, 32, 0, + 0, 0, 0, 0, 9, 0, + 0, 0, 95, 0, 0, 2, + 114, 192, 1, 0, 95, 0, + 0, 4, 114, 144, 33, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 103, 0, 0, 4, + 242, 32, 16, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 101, 0, 0, 3, 114, 32, + 16, 0, 1, 0, 0, 0, + 101, 0, 0, 3, 114, 32, + 16, 0, 2, 0, 0, 0, + 101, 0, 0, 3, 114, 32, + 16, 0, 3, 0, 0, 0, + 104, 0, 0, 2, 5, 0, + 0, 0, 0, 0, 0, 8, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 128, 32, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 128, 63, 56, 0, + 0, 7, 226, 0, 16, 0, + 0, 0, 0, 0, 86, 197, + 1, 0, 86, 146, 33, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 50, 0, 0, 9, + 226, 0, 16, 0, 0, 0, + 0, 0, 86, 146, 33, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 6, 192, 1, 0, + 86, 14, 16, 0, 0, 0, + 0, 0, 50, 0, 0, 9, + 226, 0, 16, 0, 0, 0, + 0, 0, 86, 146, 33, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 166, 202, 1, 0, + 86, 14, 16, 0, 0, 0, + 0, 0, 16, 0, 0, 7, + 18, 0, 16, 0, 1, 0, + 0, 0, 150, 7, 16, 0, + 0, 0, 0, 0, 150, 7, + 16, 0, 0, 0, 0, 0, + 68, 0, 0, 5, 18, 0, + 16, 0, 1, 0, 0, 0, + 10, 0, 16, 0, 1, 0, + 0, 0, 56, 0, 0, 7, + 226, 0, 16, 0, 0, 0, + 0, 0, 86, 14, 16, 0, + 0, 0, 0, 0, 6, 0, + 16, 0, 1, 0, 0, 0, + 56, 0, 0, 8, 114, 0, + 16, 0, 1, 0, 0, 0, + 118, 14, 16, 0, 0, 0, + 0, 0, 86, 133, 32, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 77, 0, 0, 6, + 0, 208, 0, 0, 114, 0, + 16, 0, 2, 0, 0, 0, + 38, 9, 16, 0, 1, 0, + 0, 0, 77, 0, 0, 7, + 114, 0, 16, 0, 1, 0, + 0, 0, 0, 208, 0, 0, + 70, 2, 16, 128, 65, 0, + 0, 0, 1, 0, 0, 0, + 56, 0, 0, 8, 114, 0, + 16, 0, 1, 0, 0, 0, + 70, 2, 16, 0, 1, 0, + 0, 0, 86, 133, 32, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 56, 0, 0, 7, + 114, 0, 16, 0, 1, 0, + 0, 0, 38, 9, 16, 0, + 2, 0, 0, 0, 70, 2, + 16, 0, 1, 0, 0, 0, + 56, 0, 0, 7, 114, 0, + 16, 0, 1, 0, 0, 0, + 70, 2, 16, 0, 2, 0, + 0, 0, 70, 2, 16, 0, + 1, 0, 0, 0, 56, 0, + 0, 8, 114, 0, 16, 0, + 1, 0, 0, 0, 70, 2, + 16, 0, 1, 0, 0, 0, + 6, 128, 32, 0, 0, 0, + 0, 0, 8, 0, 0, 0, + 56, 0, 0, 7, 130, 0, + 16, 0, 1, 0, 0, 0, + 42, 0, 16, 0, 2, 0, + 0, 0, 26, 0, 16, 0, + 2, 0, 0, 0, 56, 0, + 0, 7, 130, 0, 16, 0, + 1, 0, 0, 0, 10, 0, + 16, 0, 2, 0, 0, 0, + 58, 0, 16, 0, 1, 0, + 0, 0, 50, 0, 0, 10, + 130, 0, 16, 0, 1, 0, + 0, 0, 58, 0, 16, 0, + 1, 0, 0, 0, 10, 128, + 32, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 128, 63, + 56, 0, 0, 7, 114, 0, + 16, 0, 2, 0, 0, 0, + 118, 14, 16, 0, 0, 0, + 0, 0, 246, 15, 16, 0, + 1, 0, 0, 0, 14, 0, + 0, 7, 114, 0, 16, 0, + 2, 0, 0, 0, 70, 2, + 16, 0, 2, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 8, + 114, 0, 16, 0, 3, 0, + 0, 0, 86, 5, 16, 0, + 2, 0, 0, 0, 70, 130, + 32, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 50, 0, + 0, 10, 114, 0, 16, 0, + 3, 0, 0, 0, 70, 130, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 6, 0, + 16, 0, 2, 0, 0, 0, + 70, 2, 16, 0, 3, 0, + 0, 0, 50, 0, 0, 10, + 114, 0, 16, 0, 3, 0, + 0, 0, 70, 130, 32, 0, + 0, 0, 0, 0, 2, 0, + 0, 0, 166, 10, 16, 0, + 2, 0, 0, 0, 70, 2, + 16, 0, 3, 0, 0, 0, + 54, 0, 0, 5, 114, 32, + 16, 0, 1, 0, 0, 0, + 70, 2, 16, 0, 2, 0, + 0, 0, 0, 0, 0, 8, + 114, 0, 16, 0, 2, 0, + 0, 0, 70, 2, 16, 0, + 3, 0, 0, 0, 70, 130, + 32, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 56, 0, + 0, 8, 242, 0, 16, 0, + 3, 0, 0, 0, 86, 5, + 16, 0, 2, 0, 0, 0, + 70, 142, 32, 0, 0, 0, + 0, 0, 5, 0, 0, 0, + 50, 0, 0, 10, 242, 0, + 16, 0, 3, 0, 0, 0, + 70, 142, 32, 0, 0, 0, + 0, 0, 4, 0, 0, 0, + 6, 0, 16, 0, 2, 0, + 0, 0, 70, 14, 16, 0, + 3, 0, 0, 0, 50, 0, + 0, 10, 242, 0, 16, 0, + 2, 0, 0, 0, 70, 142, + 32, 0, 0, 0, 0, 0, + 6, 0, 0, 0, 166, 10, + 16, 0, 2, 0, 0, 0, + 70, 14, 16, 0, 3, 0, + 0, 0, 0, 0, 0, 8, + 242, 32, 16, 0, 0, 0, + 0, 0, 70, 14, 16, 0, + 2, 0, 0, 0, 70, 142, + 32, 0, 0, 0, 0, 0, + 7, 0, 0, 0, 54, 0, + 0, 5, 34, 0, 16, 0, + 2, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 0, 0, + 49, 0, 0, 9, 18, 0, + 16, 0, 0, 0, 0, 0, + 26, 0, 16, 128, 129, 0, + 0, 0, 0, 0, 0, 0, + 58, 0, 16, 128, 129, 0, + 0, 0, 0, 0, 0, 0, + 56, 0, 0, 10, 82, 0, + 16, 0, 2, 0, 0, 0, + 166, 11, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 128, 191, 0, 0, + 0, 0, 0, 0, 128, 63, + 0, 0, 0, 0, 54, 0, + 0, 5, 130, 0, 16, 0, + 2, 0, 0, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 55, 0, 0, 9, 114, 0, + 16, 0, 2, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 38, 9, 16, 0, + 2, 0, 0, 0, 118, 12, + 16, 0, 2, 0, 0, 0, + 16, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 2, 0, + 0, 0, 70, 2, 16, 0, + 2, 0, 0, 0, 68, 0, + 0, 5, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 7, 114, 0, + 16, 0, 2, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 2, 0, 0, 0, 56, 0, + 0, 7, 114, 0, 16, 0, + 3, 0, 0, 0, 118, 14, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 2, 0, + 0, 0, 50, 0, 0, 10, + 114, 0, 16, 0, 3, 0, + 0, 0, 230, 9, 16, 0, + 0, 0, 0, 0, 150, 4, + 16, 0, 2, 0, 0, 0, + 70, 2, 16, 128, 65, 0, + 0, 0, 3, 0, 0, 0, + 16, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 3, 0, + 0, 0, 70, 2, 16, 0, + 3, 0, 0, 0, 68, 0, + 0, 5, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 7, 114, 0, + 16, 0, 3, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 3, 0, 0, 0, 16, 0, + 0, 7, 18, 0, 16, 0, + 0, 0, 0, 0, 150, 4, + 16, 0, 1, 0, 0, 0, + 70, 2, 16, 0, 3, 0, + 0, 0, 56, 0, 0, 7, + 114, 0, 16, 0, 3, 0, + 0, 0, 246, 15, 16, 0, + 1, 0, 0, 0, 70, 2, + 16, 0, 3, 0, 0, 0, + 56, 0, 0, 7, 114, 0, + 16, 0, 4, 0, 0, 0, + 246, 15, 16, 0, 1, 0, + 0, 0, 70, 2, 16, 0, + 2, 0, 0, 0, 16, 0, + 0, 7, 18, 0, 16, 0, + 1, 0, 0, 0, 38, 9, + 16, 0, 1, 0, 0, 0, + 70, 2, 16, 0, 2, 0, + 0, 0, 50, 0, 0, 9, + 114, 0, 16, 0, 1, 0, + 0, 0, 230, 9, 16, 0, + 0, 0, 0, 0, 6, 0, + 16, 0, 1, 0, 0, 0, + 70, 2, 16, 0, 4, 0, + 0, 0, 50, 0, 0, 9, + 114, 0, 16, 0, 0, 0, + 0, 0, 150, 7, 16, 0, + 0, 0, 0, 0, 6, 0, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 3, 0, + 0, 0, 56, 0, 0, 7, + 114, 0, 16, 0, 2, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 1, 0, 0, 0, + 50, 0, 0, 10, 114, 0, + 16, 0, 0, 0, 0, 0, + 38, 9, 16, 0, 1, 0, + 0, 0, 150, 4, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 128, 65, 0, 0, 0, + 2, 0, 0, 0, 16, 0, + 0, 7, 130, 0, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 0, 0, + 0, 0, 68, 0, 0, 5, + 130, 0, 16, 0, 0, 0, + 0, 0, 58, 0, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 114, 0, 16, 0, + 0, 0, 0, 0, 246, 15, + 16, 0, 0, 0, 0, 0, + 70, 2, 16, 0, 0, 0, + 0, 0, 54, 0, 0, 5, + 114, 32, 16, 0, 2, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 8, 114, 0, 16, 0, + 1, 0, 0, 0, 86, 5, + 16, 0, 0, 0, 0, 0, + 70, 130, 32, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 50, 0, 0, 10, 178, 0, + 16, 0, 0, 0, 0, 0, + 70, 136, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 70, 8, 16, 0, + 1, 0, 0, 0, 50, 0, + 0, 10, 114, 32, 16, 0, + 3, 0, 0, 0, 70, 130, + 32, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 166, 10, + 16, 0, 0, 0, 0, 0, + 70, 3, 16, 0, 0, 0, + 0, 0, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 57, 0, 0, 0, + 5, 0, 0, 0, 0, 0, + 0, 0, 6, 0, 0, 0, + 38, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.hs.h b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.hs.h new file mode 100755 index 0000000000..d37502a5a8 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.hs.h @@ -0,0 +1,297 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11spikysphere.hlsl.hs.h /Ehs /Ths_5_0 d3d11spikysphere.hlsl +// +// +// Buffer Definitions: +// +// cbuffer cb_frame +// { +// +// float4x4 model; // Offset: 0 Size: 64 [unused] +// float4x4 view_proj; // Offset: 64 Size: 64 [unused] +// float disp_scale; // Offset: 128 Size: 4 [unused] +// float disp_freq; // Offset: 132 Size: 4 [unused] +// float tess_factor; // Offset: 136 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim Slot Elements +// ------------------------------ ---------- ------- ----------- ---- -------- +// cb_frame cbuffer NA NA 0 1 +// +// +// +// Patch Constant signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_TessFactor 0 x 0 TRIEDGE float x +// SV_TessFactor 1 x 1 TRIEDGE float x +// SV_TessFactor 2 x 2 TRIEDGE float x +// SV_InsideTessFactor 0 x 3 TRIINT float x +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// POSITION 0 xyz 0 NONE float xyz +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// POSITION 0 xyz 0 NONE float xyz +// +// Tessellation Domain # of control points +// -------------------- -------------------- +// Triangle 3 +// +// Tessellation Output Primitive Partitioning Type +// ------------------------------ ------------------ +// Clockwise Triangles Even Fractional +// +hs_5_0 +hs_decls +dcl_input_control_point_count 3 +dcl_output_control_point_count 3 +dcl_tessellator_domain domain_tri +dcl_tessellator_partitioning partitioning_fractional_even +dcl_tessellator_output_primitive output_triangle_cw +dcl_globalFlags refactoringAllowed +dcl_constantbuffer cb0[9], immediateIndexed +hs_fork_phase +dcl_hs_fork_phase_instance_count 3 +dcl_input vForkInstanceID +dcl_output_siv o0.x, finalTriUeq0EdgeTessFactor +dcl_output_siv o1.x, finalTriVeq0EdgeTessFactor +dcl_output_siv o2.x, finalTriWeq0EdgeTessFactor +dcl_temps 1 +dcl_indexrange o0.x 3 +mov r0.x, vForkInstanceID.x +mov o[r0.x + 0].x, cb0[8].z +ret +hs_fork_phase +dcl_output_siv o3.x, finalTriInsideTessFactor +mov o3.x, cb0[8].z +ret +// Approximately 5 instruction slots used +#endif + +const BYTE g_hs[] = +{ + 68, 88, 66, 67, 174, 23, + 253, 184, 171, 234, 181, 122, + 114, 17, 23, 172, 69, 130, + 17, 19, 1, 0, 0, 0, + 212, 4, 0, 0, 6, 0, + 0, 0, 56, 0, 0, 0, + 68, 2, 0, 0, 120, 2, + 0, 0, 172, 2, 0, 0, + 64, 3, 0, 0, 56, 4, + 0, 0, 82, 68, 69, 70, + 4, 2, 0, 0, 1, 0, + 0, 0, 104, 0, 0, 0, + 1, 0, 0, 0, 60, 0, + 0, 0, 0, 5, 83, 72, + 0, 1, 0, 0, 210, 1, + 0, 0, 82, 68, 49, 49, + 60, 0, 0, 0, 24, 0, + 0, 0, 32, 0, 0, 0, + 40, 0, 0, 0, 36, 0, + 0, 0, 12, 0, 0, 0, + 0, 0, 0, 0, 92, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 99, 98, 95, 102, 114, 97, + 109, 101, 0, 171, 171, 171, + 92, 0, 0, 0, 5, 0, + 0, 0, 128, 0, 0, 0, + 144, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 72, 1, 0, 0, 0, 0, + 0, 0, 64, 0, 0, 0, + 0, 0, 0, 0, 88, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 124, 1, + 0, 0, 64, 0, 0, 0, + 64, 0, 0, 0, 0, 0, + 0, 0, 88, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 134, 1, 0, 0, + 128, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, + 152, 1, 0, 0, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 188, 1, 0, 0, 132, 0, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 152, 1, + 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 255, 255, 255, 255, + 0, 0, 0, 0, 198, 1, + 0, 0, 136, 0, 0, 0, + 4, 0, 0, 0, 2, 0, + 0, 0, 152, 1, 0, 0, + 0, 0, 0, 0, 255, 255, + 255, 255, 0, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 109, 111, 100, 101, + 108, 0, 102, 108, 111, 97, + 116, 52, 120, 52, 0, 171, + 3, 0, 3, 0, 4, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 78, 1, 0, 0, + 118, 105, 101, 119, 95, 112, + 114, 111, 106, 0, 100, 105, + 115, 112, 95, 115, 99, 97, + 108, 101, 0, 102, 108, 111, + 97, 116, 0, 171, 0, 0, + 3, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 145, 1, 0, 0, 100, 105, + 115, 112, 95, 102, 114, 101, + 113, 0, 116, 101, 115, 115, + 95, 102, 97, 99, 116, 111, + 114, 0, 77, 105, 99, 114, + 111, 115, 111, 102, 116, 32, + 40, 82, 41, 32, 72, 76, + 83, 76, 32, 83, 104, 97, + 100, 101, 114, 32, 67, 111, + 109, 112, 105, 108, 101, 114, + 32, 57, 46, 50, 57, 46, + 57, 53, 50, 46, 51, 49, + 49, 49, 0, 171, 73, 83, + 71, 78, 44, 0, 0, 0, + 1, 0, 0, 0, 8, 0, + 0, 0, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 7, 7, + 0, 0, 80, 79, 83, 73, + 84, 73, 79, 78, 0, 171, + 171, 171, 79, 83, 71, 78, + 44, 0, 0, 0, 1, 0, + 0, 0, 8, 0, 0, 0, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 7, 8, 0, 0, + 80, 79, 83, 73, 84, 73, + 79, 78, 0, 171, 171, 171, + 80, 67, 83, 71, 140, 0, + 0, 0, 4, 0, 0, 0, + 8, 0, 0, 0, 104, 0, + 0, 0, 0, 0, 0, 0, + 13, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 1, 14, 0, 0, 104, 0, + 0, 0, 1, 0, 0, 0, + 13, 0, 0, 0, 3, 0, + 0, 0, 1, 0, 0, 0, + 1, 14, 0, 0, 104, 0, + 0, 0, 2, 0, 0, 0, + 13, 0, 0, 0, 3, 0, + 0, 0, 2, 0, 0, 0, + 1, 14, 0, 0, 118, 0, + 0, 0, 0, 0, 0, 0, + 14, 0, 0, 0, 3, 0, + 0, 0, 3, 0, 0, 0, + 1, 14, 0, 0, 83, 86, + 95, 84, 101, 115, 115, 70, + 97, 99, 116, 111, 114, 0, + 83, 86, 95, 73, 110, 115, + 105, 100, 101, 84, 101, 115, + 115, 70, 97, 99, 116, 111, + 114, 0, 171, 171, 83, 72, + 69, 88, 240, 0, 0, 0, + 80, 0, 3, 0, 60, 0, + 0, 0, 113, 0, 0, 1, + 147, 24, 0, 1, 148, 24, + 0, 1, 149, 16, 0, 1, + 150, 32, 0, 1, 151, 24, + 0, 1, 106, 8, 0, 1, + 89, 0, 0, 4, 70, 142, + 32, 0, 0, 0, 0, 0, + 9, 0, 0, 0, 115, 0, + 0, 1, 153, 0, 0, 2, + 3, 0, 0, 0, 95, 0, + 0, 2, 0, 112, 1, 0, + 103, 0, 0, 4, 18, 32, + 16, 0, 0, 0, 0, 0, + 17, 0, 0, 0, 103, 0, + 0, 4, 18, 32, 16, 0, + 1, 0, 0, 0, 18, 0, + 0, 0, 103, 0, 0, 4, + 18, 32, 16, 0, 2, 0, + 0, 0, 19, 0, 0, 0, + 104, 0, 0, 2, 1, 0, + 0, 0, 91, 0, 0, 4, + 18, 32, 16, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 54, 0, 0, 4, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 112, 1, 0, 54, 0, + 0, 7, 18, 32, 144, 0, + 10, 0, 16, 0, 0, 0, + 0, 0, 42, 128, 32, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 62, 0, 0, 1, + 115, 0, 0, 1, 103, 0, + 0, 4, 18, 32, 16, 0, + 3, 0, 0, 0, 20, 0, + 0, 0, 54, 0, 0, 6, + 18, 32, 16, 0, 3, 0, + 0, 0, 42, 128, 32, 0, + 0, 0, 0, 0, 8, 0, + 0, 0, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 5, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 3, 0, 0, 0, 4, 0, + 0, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.ps.h b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.ps.h new file mode 100755 index 0000000000..9af2071371 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.ps.h @@ -0,0 +1,211 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11spikysphere.hlsl.ps.h /Eps /Tps_4_0 d3d11spikysphere.hlsl +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float +// OBJPOS 0 xyz 1 NONE float xyz +// OBJNORMAL 0 xyz 2 NONE float xyz +// WORLDNORMAL 0 xyz 3 NONE float xyz +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_TARGET 0 xyzw 0 TARGET float xyzw +// +ps_4_0 +dcl_input_ps linear v1.xyz +dcl_input_ps linear v2.xyz +dcl_input_ps linear v3.xyz +dcl_output o0.xyzw +dcl_temps 2 +dp3 r0.x, v2.xyzx, v2.xyzx +rsq r0.x, r0.x +mul r0.xyz, r0.xxxx, v2.xyzx +dp3 r0.w, v1.xyzx, v1.xyzx +rsq r0.w, r0.w +mul r1.xyz, r0.wwww, v1.xyzx +dp3_sat r0.x, r0.xyzx, r1.xyzx +dp3 r0.y, v3.xyzx, v3.xyzx +rsq r0.y, r0.y +mul r0.yz, r0.yyyy, v3.yyzy +dp2_sat r0.y, l(0.707107, -0.707107, 0.000000, 0.000000), r0.yzyy +mul r0.yzw, r0.yyyy, l(0.000000, 0.600000, 0.600000, 0.400000) +mad o0.xyz, r0.xxxx, l(0.400000, 0.400000, 0.600000, 0.000000), r0.yzwy +mov o0.w, l(1.000000) +ret +// Approximately 15 instruction slots used +#endif + +const BYTE g_ps[] = +{ + 68, 88, 66, 67, 211, 117, + 143, 38, 226, 40, 181, 77, + 39, 255, 33, 137, 74, 241, + 40, 100, 1, 0, 0, 0, + 184, 3, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 140, 0, 0, 0, 40, 1, + 0, 0, 92, 1, 0, 0, + 60, 3, 0, 0, 82, 68, + 69, 70, 80, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 255, 255, 0, 1, 0, 0, + 28, 0, 0, 0, 77, 105, + 99, 114, 111, 115, 111, 102, + 116, 32, 40, 82, 41, 32, + 72, 76, 83, 76, 32, 83, + 104, 97, 100, 101, 114, 32, + 67, 111, 109, 112, 105, 108, + 101, 114, 32, 57, 46, 50, + 57, 46, 57, 53, 50, 46, + 51, 49, 49, 49, 0, 171, + 171, 171, 73, 83, 71, 78, + 148, 0, 0, 0, 4, 0, + 0, 0, 8, 0, 0, 0, + 104, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 0, 0, 0, + 116, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 7, 7, 0, 0, + 123, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 2, 0, + 0, 0, 7, 7, 0, 0, + 133, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 3, 0, + 0, 0, 7, 7, 0, 0, + 83, 86, 95, 80, 79, 83, + 73, 84, 73, 79, 78, 0, + 79, 66, 74, 80, 79, 83, + 0, 79, 66, 74, 78, 79, + 82, 77, 65, 76, 0, 87, + 79, 82, 76, 68, 78, 79, + 82, 77, 65, 76, 0, 171, + 171, 171, 79, 83, 71, 78, + 44, 0, 0, 0, 1, 0, + 0, 0, 8, 0, 0, 0, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 0, 0, 0, + 83, 86, 95, 84, 65, 82, + 71, 69, 84, 0, 171, 171, + 83, 72, 68, 82, 216, 1, + 0, 0, 64, 0, 0, 0, + 118, 0, 0, 0, 98, 16, + 0, 3, 114, 16, 16, 0, + 1, 0, 0, 0, 98, 16, + 0, 3, 114, 16, 16, 0, + 2, 0, 0, 0, 98, 16, + 0, 3, 114, 16, 16, 0, + 3, 0, 0, 0, 101, 0, + 0, 3, 242, 32, 16, 0, + 0, 0, 0, 0, 104, 0, + 0, 2, 2, 0, 0, 0, + 16, 0, 0, 7, 18, 0, + 16, 0, 0, 0, 0, 0, + 70, 18, 16, 0, 2, 0, + 0, 0, 70, 18, 16, 0, + 2, 0, 0, 0, 68, 0, + 0, 5, 18, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 7, 114, 0, + 16, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 70, 18, 16, 0, + 2, 0, 0, 0, 16, 0, + 0, 7, 130, 0, 16, 0, + 0, 0, 0, 0, 70, 18, + 16, 0, 1, 0, 0, 0, + 70, 18, 16, 0, 1, 0, + 0, 0, 68, 0, 0, 5, + 130, 0, 16, 0, 0, 0, + 0, 0, 58, 0, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 114, 0, 16, 0, + 1, 0, 0, 0, 246, 15, + 16, 0, 0, 0, 0, 0, + 70, 18, 16, 0, 1, 0, + 0, 0, 16, 32, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 70, 2, 16, 0, + 0, 0, 0, 0, 70, 2, + 16, 0, 1, 0, 0, 0, + 16, 0, 0, 7, 34, 0, + 16, 0, 0, 0, 0, 0, + 70, 18, 16, 0, 3, 0, + 0, 0, 70, 18, 16, 0, + 3, 0, 0, 0, 68, 0, + 0, 5, 34, 0, 16, 0, + 0, 0, 0, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 7, 98, 0, + 16, 0, 0, 0, 0, 0, + 86, 5, 16, 0, 0, 0, + 0, 0, 86, 22, 16, 0, + 3, 0, 0, 0, 15, 32, + 0, 10, 34, 0, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 243, 4, 53, 63, + 243, 4, 53, 191, 0, 0, + 0, 0, 0, 0, 0, 0, + 150, 5, 16, 0, 0, 0, + 0, 0, 56, 0, 0, 10, + 226, 0, 16, 0, 0, 0, + 0, 0, 86, 5, 16, 0, + 0, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 0, 0, + 154, 153, 25, 63, 154, 153, + 25, 63, 205, 204, 204, 62, + 50, 0, 0, 12, 114, 32, + 16, 0, 0, 0, 0, 0, + 6, 0, 16, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 205, 204, 204, 62, 205, 204, + 204, 62, 154, 153, 25, 63, + 0, 0, 0, 0, 150, 7, + 16, 0, 0, 0, 0, 0, + 54, 0, 0, 5, 130, 32, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 128, 63, 62, 0, 0, 1, + 83, 84, 65, 84, 116, 0, + 0, 0, 15, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, + 12, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.vs.h b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.vs.h new file mode 100755 index 0000000000..c71b0c3ae0 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.hlsl.vs.h @@ -0,0 +1,105 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11spikysphere.hlsl.vs.h /Evs /Tvs_4_0 d3d11spikysphere.hlsl +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// POSITION 0 xyz 0 NONE float xyz +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// POSITION 0 xyz 0 NONE float xyz +// +vs_4_0 +dcl_input v0.xyz +dcl_output o0.xyz +mov o0.xyz, v0.xyzx +ret +// Approximately 2 instruction slots used +#endif + +const BYTE g_vs[] = +{ + 68, 88, 66, 67, 71, 140, + 219, 201, 207, 71, 236, 3, + 158, 208, 157, 229, 54, 227, + 221, 132, 1, 0, 0, 0, + 176, 1, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 140, 0, 0, 0, 192, 0, + 0, 0, 244, 0, 0, 0, + 52, 1, 0, 0, 82, 68, + 69, 70, 80, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 254, 255, 0, 1, 0, 0, + 28, 0, 0, 0, 77, 105, + 99, 114, 111, 115, 111, 102, + 116, 32, 40, 82, 41, 32, + 72, 76, 83, 76, 32, 83, + 104, 97, 100, 101, 114, 32, + 67, 111, 109, 112, 105, 108, + 101, 114, 32, 57, 46, 50, + 57, 46, 57, 53, 50, 46, + 51, 49, 49, 49, 0, 171, + 171, 171, 73, 83, 71, 78, + 44, 0, 0, 0, 1, 0, + 0, 0, 8, 0, 0, 0, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 7, 7, 0, 0, + 80, 79, 83, 73, 84, 73, + 79, 78, 0, 171, 171, 171, + 79, 83, 71, 78, 44, 0, + 0, 0, 1, 0, 0, 0, + 8, 0, 0, 0, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 7, 8, 0, 0, 80, 79, + 83, 73, 84, 73, 79, 78, + 0, 171, 171, 171, 83, 72, + 68, 82, 56, 0, 0, 0, + 64, 0, 1, 0, 14, 0, + 0, 0, 95, 0, 0, 3, + 114, 16, 16, 0, 0, 0, + 0, 0, 101, 0, 0, 3, + 114, 32, 16, 0, 0, 0, + 0, 0, 54, 0, 0, 5, + 114, 32, 16, 0, 0, 0, + 0, 0, 70, 18, 16, 0, + 0, 0, 0, 0, 62, 0, + 0, 1, 83, 84, 65, 84, + 116, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.vcxproj b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.vcxproj new file mode 100755 index 0000000000..0cf8c709d4 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11spikysphere/d3d11spikysphere.vcxproj @@ -0,0 +1,102 @@ +<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{64988608-72A3-4125-8A31-45E1EACE8F0A}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>d3d11spikysphere</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)\d3d11app</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>d3d11.lib;d3dx10.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)\d3d11app</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>d3d11.lib;d3dx10.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="..\d3d11app\d3d11winmain.cpp" />
+ <ClCompile Include="d3d11spikysphere.cpp" />
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="d3d11spikysphere.hlsl">
+ <FileType>Document</FileType>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">"$(DXSDK_DIR)\Utilities\bin\x86\fxc.exe" /Fh%(Identity).ps.h /Eps /Tps_4_0 %(Identity)
+"$(DXSDK_DIR)\Utilities\bin\x86\fxc.exe" /Fh%(Identity).vs.h /Evs /Tvs_4_0 %(Identity)</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).ps.h;%(Identity).vs.h;%(Outputs)</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\d3d11app\d3d11app.h" />
+ <ClInclude Include="d3d11spikysphere.hlsl.ds.h" />
+ <ClInclude Include="d3d11spikysphere.hlsl.hs.h" />
+ <ClInclude Include="d3d11spikysphere.hlsl.ps.h" />
+ <ClInclude Include="d3d11spikysphere.hlsl.vs.h" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.cpp b/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.cpp new file mode 100755 index 0000000000..19c669be9c --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.cpp @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "d3d11app.h" +#include "d3d11u.h" +#include "d3d11tex.hlsl.ps.h" +#include "d3d11tex.hlsl.vs.h" +#include "../data/cornell_box_image.h" +#include "../data/tux_image.h" + +struct d3d11tex : public d3d11_application +{ + ID3D11PixelShader* ps; + ID3D11VertexShader* vs; + mesh* quad; + ID3D11ShaderResourceView* srv[2]; + ID3D11SamplerState* samp[2]; + + virtual bool init(ID3D11Device* dev, int argc, char** argv) + { + ensure(dev->CreatePixelShader(g_ps, sizeof(g_ps), NULL, &ps)); + ensure(dev->CreateVertexShader(g_vs, sizeof(g_vs), NULL, &vs)); + + quad = create_tex_quad(dev, g_vs, sizeof(g_vs)); + + D3D11_TEXTURE2D_DESC texd; + memset(&texd, 0, sizeof(texd)); + texd.BindFlags = D3D11_BIND_SHADER_RESOURCE; + texd.Usage = D3D11_USAGE_IMMUTABLE; + texd.SampleDesc.Count = 1; + texd.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + texd.Width = 32; + texd.Height = 32; + texd.ArraySize = 1; + texd.MipLevels = 1; + + D3D11_SUBRESOURCE_DATA texsd; + texsd.SysMemPitch = 32 * 4; + texsd.SysMemSlicePitch = 32 * 32 * 4; + + ID3D11Texture2D* tex; + + texsd.pSysMem = g_cornell_box_image; + ensure(dev->CreateTexture2D(&texd, &texsd, &tex)); + ensure(dev->CreateShaderResourceView(tex, 0, &srv[0])); + tex->Release(); + + texsd.pSysMem = g_tux_image; + ensure(dev->CreateTexture2D(&texd, &texsd, &tex)); + ensure(dev->CreateShaderResourceView(tex, 0, &srv[1])); + tex->Release(); + + D3D11_SAMPLER_DESC sampd; + memset(&sampd, 0, sizeof(sampd)); + sampd.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; + sampd.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; + sampd.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; + sampd.MinLOD = -FLT_MAX; + sampd.MaxLOD = FLT_MAX; + + sampd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + dev->CreateSamplerState(&sampd, &samp[0]); + + sampd.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT; + dev->CreateSamplerState(&sampd, &samp[1]); + return true; + } + + virtual void draw(ID3D11DeviceContext* ctx, ID3D11RenderTargetView* rtv, unsigned width, unsigned height, double time) + { + D3D11_VIEWPORT vp; + memset(&vp, 0, sizeof(vp)); + vp.Width = (float)width; + vp.Height = (float)height; + vp.MaxDepth = 1.0f; + + ctx->OMSetRenderTargets(1, &rtv, 0); + ctx->RSSetViewports(1, &vp); + + ctx->VSSetShader(vs, NULL, 0); + ctx->PSSetShader(ps, NULL, 0); + + ctx->PSSetShaderResources(0, 2, srv); + ctx->PSSetSamplers(0, 2, samp); + + quad->bind_and_draw(ctx); + } +}; + +d3d11_application* d3d11_application_create() +{ + return new d3d11tex(); +} diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.hlsl b/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.hlsl new file mode 100755 index 0000000000..1a6990cc39 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.hlsl @@ -0,0 +1,66 @@ +/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+Texture2D tex0;
+Texture2D tex1;
+sampler samp0;
+sampler samp1;
+
+struct IA2VS
+{
+ float4 position : POSITION;
+ float2 texcoord : TEXCOORD;
+};
+
+struct VS2PS
+{
+ float4 position : SV_POSITION;
+ float2 texcoord : TEXCOORD;
+ float4 factors : FACTORS;
+};
+
+VS2PS vs(IA2VS input)
+{
+ VS2PS result;
+ result.position = input.position;
+ result.texcoord = input.texcoord * 8;
+ result.factors.xy = input.texcoord;
+ result.factors.zw = 1 - input.texcoord;
+ return result;
+}
+
+float4 ps(VS2PS input) : SV_TARGET
+{
+ float4 a0 = tex0.Sample(samp0, input.texcoord);
+ float4 a1 = tex0.Sample(samp1, input.texcoord);
+ float4 a = a0 * input.factors.z + a1 * input.factors.x;
+
+ float4 b0 = tex1.Sample(samp0, input.texcoord);
+ float4 b1 = tex1.Sample(samp1, input.texcoord);
+ float4 b = b0 * input.factors.z + b1 * input.factors.x;
+
+ return a * input.factors.w + b * input.factors.y;
+}
diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.hlsl.ps.h b/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.hlsl.ps.h new file mode 100755 index 0000000000..29795a9909 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.hlsl.ps.h @@ -0,0 +1,234 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11tex.hlsl.ps.h /Eps /Tps_4_0 d3d11tex.hlsl +// +// +// Resource Bindings: +// +// Name Type Format Dim Slot Elements +// ------------------------------ ---------- ------- ----------- ---- -------- +// samp0 sampler NA NA 0 1 +// samp1 sampler NA NA 1 1 +// tex0 texture float4 2d 0 1 +// tex1 texture float4 2d 1 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float +// TEXCOORD 0 xy 1 NONE float xy +// FACTORS 0 xyzw 2 NONE float xyzw +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_TARGET 0 xyzw 0 TARGET float xyzw +// +ps_4_0 +dcl_sampler s0, mode_default +dcl_sampler s1, mode_default +dcl_resource_texture2d (float,float,float,float) t0 +dcl_resource_texture2d (float,float,float,float) t1 +dcl_input_ps linear v1.xy +dcl_input_ps linear v2.xyzw +dcl_output o0.xyzw +dcl_temps 3 +sample r0.xyzw, v1.xyxx, t1.xyzw, s1 +mul r0.xyzw, r0.xyzw, v2.xxxx +sample r1.xyzw, v1.xyxx, t1.xyzw, s0 +mad r0.xyzw, r1.xyzw, v2.zzzz, r0.xyzw +mul r0.xyzw, r0.xyzw, v2.yyyy +sample r1.xyzw, v1.xyxx, t0.xyzw, s1 +mul r1.xyzw, r1.xyzw, v2.xxxx +sample r2.xyzw, v1.xyxx, t0.xyzw, s0 +mad r1.xyzw, r2.xyzw, v2.zzzz, r1.xyzw +mad o0.xyzw, r1.xyzw, v2.wwww, r0.xyzw +ret +// Approximately 11 instruction slots used +#endif + +const BYTE g_ps[] = +{ + 68, 88, 66, 67, 139, 203, + 114, 37, 104, 101, 201, 12, + 197, 147, 116, 98, 80, 214, + 173, 207, 1, 0, 0, 0, + 16, 4, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 32, 1, 0, 0, 152, 1, + 0, 0, 204, 1, 0, 0, + 148, 3, 0, 0, 82, 68, + 69, 70, 228, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 255, 255, 0, 1, 0, 0, + 178, 0, 0, 0, 156, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 162, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 168, 0, 0, 0, + 2, 0, 0, 0, 5, 0, + 0, 0, 4, 0, 0, 0, + 255, 255, 255, 255, 0, 0, + 0, 0, 1, 0, 0, 0, + 12, 0, 0, 0, 173, 0, + 0, 0, 2, 0, 0, 0, + 5, 0, 0, 0, 4, 0, + 0, 0, 255, 255, 255, 255, + 1, 0, 0, 0, 1, 0, + 0, 0, 12, 0, 0, 0, + 115, 97, 109, 112, 48, 0, + 115, 97, 109, 112, 49, 0, + 116, 101, 120, 48, 0, 116, + 101, 120, 49, 0, 77, 105, + 99, 114, 111, 115, 111, 102, + 116, 32, 40, 82, 41, 32, + 72, 76, 83, 76, 32, 83, + 104, 97, 100, 101, 114, 32, + 67, 111, 109, 112, 105, 108, + 101, 114, 32, 57, 46, 50, + 57, 46, 57, 53, 50, 46, + 51, 49, 49, 49, 0, 171, + 73, 83, 71, 78, 112, 0, + 0, 0, 3, 0, 0, 0, + 8, 0, 0, 0, 80, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 92, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 1, 0, 0, 0, + 3, 3, 0, 0, 101, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 2, 0, 0, 0, + 15, 15, 0, 0, 83, 86, + 95, 80, 79, 83, 73, 84, + 73, 79, 78, 0, 84, 69, + 88, 67, 79, 79, 82, 68, + 0, 70, 65, 67, 84, 79, + 82, 83, 0, 171, 171, 171, + 79, 83, 71, 78, 44, 0, + 0, 0, 1, 0, 0, 0, + 8, 0, 0, 0, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 83, 86, + 95, 84, 65, 82, 71, 69, + 84, 0, 171, 171, 83, 72, + 68, 82, 192, 1, 0, 0, + 64, 0, 0, 0, 112, 0, + 0, 0, 90, 0, 0, 3, + 0, 96, 16, 0, 0, 0, + 0, 0, 90, 0, 0, 3, + 0, 96, 16, 0, 1, 0, + 0, 0, 88, 24, 0, 4, + 0, 112, 16, 0, 0, 0, + 0, 0, 85, 85, 0, 0, + 88, 24, 0, 4, 0, 112, + 16, 0, 1, 0, 0, 0, + 85, 85, 0, 0, 98, 16, + 0, 3, 50, 16, 16, 0, + 1, 0, 0, 0, 98, 16, + 0, 3, 242, 16, 16, 0, + 2, 0, 0, 0, 101, 0, + 0, 3, 242, 32, 16, 0, + 0, 0, 0, 0, 104, 0, + 0, 2, 3, 0, 0, 0, + 69, 0, 0, 9, 242, 0, + 16, 0, 0, 0, 0, 0, + 70, 16, 16, 0, 1, 0, + 0, 0, 70, 126, 16, 0, + 1, 0, 0, 0, 0, 96, + 16, 0, 1, 0, 0, 0, + 56, 0, 0, 7, 242, 0, + 16, 0, 0, 0, 0, 0, + 70, 14, 16, 0, 0, 0, + 0, 0, 6, 16, 16, 0, + 2, 0, 0, 0, 69, 0, + 0, 9, 242, 0, 16, 0, + 1, 0, 0, 0, 70, 16, + 16, 0, 1, 0, 0, 0, + 70, 126, 16, 0, 1, 0, + 0, 0, 0, 96, 16, 0, + 0, 0, 0, 0, 50, 0, + 0, 9, 242, 0, 16, 0, + 0, 0, 0, 0, 70, 14, + 16, 0, 1, 0, 0, 0, + 166, 26, 16, 0, 2, 0, + 0, 0, 70, 14, 16, 0, + 0, 0, 0, 0, 56, 0, + 0, 7, 242, 0, 16, 0, + 0, 0, 0, 0, 70, 14, + 16, 0, 0, 0, 0, 0, + 86, 21, 16, 0, 2, 0, + 0, 0, 69, 0, 0, 9, + 242, 0, 16, 0, 1, 0, + 0, 0, 70, 16, 16, 0, + 1, 0, 0, 0, 70, 126, + 16, 0, 0, 0, 0, 0, + 0, 96, 16, 0, 1, 0, + 0, 0, 56, 0, 0, 7, + 242, 0, 16, 0, 1, 0, + 0, 0, 70, 14, 16, 0, + 1, 0, 0, 0, 6, 16, + 16, 0, 2, 0, 0, 0, + 69, 0, 0, 9, 242, 0, + 16, 0, 2, 0, 0, 0, + 70, 16, 16, 0, 1, 0, + 0, 0, 70, 126, 16, 0, + 0, 0, 0, 0, 0, 96, + 16, 0, 0, 0, 0, 0, + 50, 0, 0, 9, 242, 0, + 16, 0, 1, 0, 0, 0, + 70, 14, 16, 0, 2, 0, + 0, 0, 166, 26, 16, 0, + 2, 0, 0, 0, 70, 14, + 16, 0, 1, 0, 0, 0, + 50, 0, 0, 9, 242, 32, + 16, 0, 0, 0, 0, 0, + 70, 14, 16, 0, 1, 0, + 0, 0, 246, 31, 16, 0, + 2, 0, 0, 0, 70, 14, + 16, 0, 0, 0, 0, 0, + 62, 0, 0, 1, 83, 84, + 65, 84, 116, 0, 0, 0, + 11, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.hlsl.vs.h b/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.hlsl.vs.h new file mode 100755 index 0000000000..3aae79441a --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.hlsl.vs.h @@ -0,0 +1,153 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11tex.hlsl.vs.h /Evs /Tvs_4_0 d3d11tex.hlsl +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// POSITION 0 xyzw 0 NONE float xyzw +// TEXCOORD 0 xy 1 NONE float xy +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float xyzw +// TEXCOORD 0 xy 1 NONE float xy +// FACTORS 0 xyzw 2 NONE float xyzw +// +vs_4_0 +dcl_input v0.xyzw +dcl_input v1.xy +dcl_output_siv o0.xyzw, position +dcl_output o1.xy +dcl_output o2.xyzw +mov o0.xyzw, v0.xyzw +mul o1.xy, v1.xyxx, l(8.000000, 8.000000, 0.000000, 0.000000) +mad o2.xyzw, v1.xyxy, l(1.000000, 1.000000, -1.000000, -1.000000), l(0.000000, 0.000000, 1.000000, 1.000000) +ret +// Approximately 4 instruction slots used +#endif + +const BYTE g_vs[] = +{ + 68, 88, 66, 67, 129, 141, + 49, 0, 46, 132, 26, 20, + 64, 38, 200, 86, 119, 202, + 172, 121, 1, 0, 0, 0, + 160, 2, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 140, 0, 0, 0, 224, 0, + 0, 0, 88, 1, 0, 0, + 36, 2, 0, 0, 82, 68, + 69, 70, 80, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 254, 255, 0, 1, 0, 0, + 28, 0, 0, 0, 77, 105, + 99, 114, 111, 115, 111, 102, + 116, 32, 40, 82, 41, 32, + 72, 76, 83, 76, 32, 83, + 104, 97, 100, 101, 114, 32, + 67, 111, 109, 112, 105, 108, + 101, 114, 32, 57, 46, 50, + 57, 46, 57, 53, 50, 46, + 51, 49, 49, 49, 0, 171, + 171, 171, 73, 83, 71, 78, + 76, 0, 0, 0, 2, 0, + 0, 0, 8, 0, 0, 0, + 56, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 15, 0, 0, + 65, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 3, 3, 0, 0, + 80, 79, 83, 73, 84, 73, + 79, 78, 0, 84, 69, 88, + 67, 79, 79, 82, 68, 0, + 171, 171, 79, 83, 71, 78, + 112, 0, 0, 0, 3, 0, + 0, 0, 8, 0, 0, 0, + 80, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 0, 0, 0, + 92, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 3, 12, 0, 0, + 101, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 2, 0, + 0, 0, 15, 0, 0, 0, + 83, 86, 95, 80, 79, 83, + 73, 84, 73, 79, 78, 0, + 84, 69, 88, 67, 79, 79, + 82, 68, 0, 70, 65, 67, + 84, 79, 82, 83, 0, 171, + 171, 171, 83, 72, 68, 82, + 196, 0, 0, 0, 64, 0, + 1, 0, 49, 0, 0, 0, + 95, 0, 0, 3, 242, 16, + 16, 0, 0, 0, 0, 0, + 95, 0, 0, 3, 50, 16, + 16, 0, 1, 0, 0, 0, + 103, 0, 0, 4, 242, 32, + 16, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 101, 0, + 0, 3, 50, 32, 16, 0, + 1, 0, 0, 0, 101, 0, + 0, 3, 242, 32, 16, 0, + 2, 0, 0, 0, 54, 0, + 0, 5, 242, 32, 16, 0, + 0, 0, 0, 0, 70, 30, + 16, 0, 0, 0, 0, 0, + 56, 0, 0, 10, 50, 32, + 16, 0, 1, 0, 0, 0, + 70, 16, 16, 0, 1, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 0, 65, 0, 0, + 0, 65, 0, 0, 0, 0, + 0, 0, 0, 0, 50, 0, + 0, 15, 242, 32, 16, 0, + 2, 0, 0, 0, 70, 20, + 16, 0, 1, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 128, 63, 0, 0, 128, 63, + 0, 0, 128, 191, 0, 0, + 128, 191, 2, 64, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 128, 63, + 0, 0, 128, 63, 62, 0, + 0, 1, 83, 84, 65, 84, + 116, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 5, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.vcxproj b/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.vcxproj new file mode 100755 index 0000000000..ea6cc03868 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11tex/d3d11tex.vcxproj @@ -0,0 +1,98 @@ +<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{14F73B97-2DC6-423E-97D9-64E3368713DC}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>d3d11tex</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)\d3d11app</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>d3d11.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)\d3d11app</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>d3d11.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <CustomBuild Include="d3d11tex.hlsl">
+ <FileType>Document</FileType>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">"$(DXSDK_DIR)\Utilities\bin\x86\fxc.exe" /Fh%(Identity).ps.h /Eps /Tps_4_0 %(Identity)
+"$(DXSDK_DIR)\Utilities\bin\x86\fxc.exe" /Fh%(Identity).vs.h /Evs /Tvs_4_0 %(Identity)</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).ps.h;%(Identity).vs.h;%(Outputs)</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\d3d11app\d3d11app.h" />
+ <ClInclude Include="d3d11tex.hlsl.ps.h" />
+ <ClInclude Include="d3d11tex.hlsl.vs.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\d3d11app\d3d11winmain.cpp" />
+ <ClCompile Include="d3d11tex.cpp" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.cpp b/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.cpp new file mode 100755 index 0000000000..524b7d1c01 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.cpp @@ -0,0 +1,120 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "d3d11app.h" +#include "d3d11tri.hlsl.ps.h" +#include "d3d11tri.hlsl.vs.h" + +struct vertex { + float position[4]; + float color[4]; +}; + +static struct vertex vertices[3] = +{ + { + { 0.0f, 0.9f, 0.5f, 1.0f }, + { 1.0f, 0.0f, 0.0f, 1.0f } + }, + { + { 0.9f, -0.9f, 0.5f, 1.0f }, + { 0.0f, 0.0f, 1.0f, 1.0f } + }, + { + { -0.9f, -0.9f, 0.5f, 1.0f }, + { 0.0f, 1.0f, 0.0f, 1.0f } + }, +}; + +struct d3d11tri : public d3d11_application +{ + ID3D11PixelShader* ps; + ID3D11VertexShader* vs; + ID3D11InputLayout* layout; + ID3D11Buffer* vb; + + virtual bool init(ID3D11Device* dev, int argc, char** argv) + { + ensure(dev->CreatePixelShader(g_ps, sizeof(g_ps), NULL, &ps)); + ensure(dev->CreateVertexShader(g_vs, sizeof(g_vs), NULL, &vs)); + + D3D11_INPUT_ELEMENT_DESC elements[] = + { + // inverse order to make sure the implementation can properly parse the vertex shader signature + {"COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0}, + {"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, + }; + + ensure(dev->CreateInputLayout(elements, sizeof(elements) / sizeof(elements[0]), g_vs, sizeof(g_vs), &layout)); + D3D11_BUFFER_DESC bufferd; + bufferd.ByteWidth = sizeof(vertices); + bufferd.Usage = D3D11_USAGE_IMMUTABLE; + bufferd.BindFlags = D3D11_BIND_VERTEX_BUFFER; + bufferd.CPUAccessFlags = 0; + bufferd.MiscFlags = 0; + bufferd.StructureByteStride = 0; + + D3D11_SUBRESOURCE_DATA buffersd; + buffersd.pSysMem = vertices; + buffersd.SysMemPitch = sizeof(vertices); + buffersd.SysMemSlicePitch = sizeof(vertices); + + ensure(dev->CreateBuffer(&bufferd, &buffersd, &vb)); + + return true; + } + + virtual void draw(ID3D11DeviceContext* ctx, ID3D11RenderTargetView* rtv, unsigned width, unsigned height, double time) + { + float clear_color[4] = {1, 0, 1, 1}; + D3D11_VIEWPORT vp; + memset(&vp, 0, sizeof(vp)); + vp.Width = (float)width; + vp.Height = (float)height; + vp.MaxDepth = 1.0f; + + ctx->OMSetRenderTargets(1, &rtv, 0); + ctx->RSSetViewports(1, &vp); + + ctx->ClearRenderTargetView(rtv, clear_color); + + ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + ctx->IASetInputLayout(layout); + unsigned stride = 2 * 4 * 4; + unsigned offset = 0; + ctx->IASetVertexBuffers(0, 1, &vb, &stride, &offset); + + ctx->VSSetShader(vs, NULL, 0); + ctx->PSSetShader(ps, NULL, 0); + + ctx->Draw(3, 0); + } +}; + +d3d11_application* d3d11_application_create() +{ + return new d3d11tri(); +} diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.hlsl b/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.hlsl new file mode 100755 index 0000000000..6bdd448ce0 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.hlsl @@ -0,0 +1,50 @@ +/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+struct IA2VS
+{
+ float4 position : POSITION;
+ float4 color : COLOR;
+};
+
+struct VS2PS
+{
+ float4 position : SV_POSITION;
+ float4 color : COLOR;
+};
+
+VS2PS vs(IA2VS input)
+{
+ VS2PS result;
+ result.position = input.position;
+ result.color = input.color;
+ return result;
+}
+
+float4 ps(VS2PS input) : SV_TARGET
+{
+ return input.color;
+}
diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.hlsl.ps.h b/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.hlsl.ps.h new file mode 100755 index 0000000000..68eaee5cb2 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.hlsl.ps.h @@ -0,0 +1,112 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11tri.hlsl.ps.h /Eps /Tps_4_0 d3d11tri.hlsl +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float +// COLOR 0 xyzw 1 NONE float xyzw +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_TARGET 0 xyzw 0 TARGET float xyzw +// +ps_4_0 +dcl_input_ps linear v1.xyzw +dcl_output o0.xyzw +mov o0.xyzw, v1.xyzw +ret +// Approximately 2 instruction slots used +#endif + +const BYTE g_ps[] = +{ + 68, 88, 66, 67, 206, 120, + 117, 238, 118, 127, 10, 87, + 80, 75, 114, 198, 95, 2, + 120, 102, 1, 0, 0, 0, + 208, 1, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 140, 0, 0, 0, 224, 0, + 0, 0, 20, 1, 0, 0, + 84, 1, 0, 0, 82, 68, + 69, 70, 80, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 255, 255, 0, 1, 0, 0, + 28, 0, 0, 0, 77, 105, + 99, 114, 111, 115, 111, 102, + 116, 32, 40, 82, 41, 32, + 72, 76, 83, 76, 32, 83, + 104, 97, 100, 101, 114, 32, + 67, 111, 109, 112, 105, 108, + 101, 114, 32, 57, 46, 50, + 57, 46, 57, 53, 50, 46, + 51, 49, 49, 49, 0, 171, + 171, 171, 73, 83, 71, 78, + 76, 0, 0, 0, 2, 0, + 0, 0, 8, 0, 0, 0, + 56, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 0, 0, 0, + 68, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 15, 15, 0, 0, + 83, 86, 95, 80, 79, 83, + 73, 84, 73, 79, 78, 0, + 67, 79, 76, 79, 82, 0, + 171, 171, 79, 83, 71, 78, + 44, 0, 0, 0, 1, 0, + 0, 0, 8, 0, 0, 0, + 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 0, 0, 0, + 83, 86, 95, 84, 65, 82, + 71, 69, 84, 0, 171, 171, + 83, 72, 68, 82, 56, 0, + 0, 0, 64, 0, 0, 0, + 14, 0, 0, 0, 98, 16, + 0, 3, 242, 16, 16, 0, + 1, 0, 0, 0, 101, 0, + 0, 3, 242, 32, 16, 0, + 0, 0, 0, 0, 54, 0, + 0, 5, 242, 32, 16, 0, + 0, 0, 0, 0, 70, 30, + 16, 0, 1, 0, 0, 0, + 62, 0, 0, 1, 83, 84, + 65, 84, 116, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.hlsl.vs.h b/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.hlsl.vs.h new file mode 100755 index 0000000000..43e2a18275 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.hlsl.vs.h @@ -0,0 +1,128 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 +// +// +// fxc /Fhd3d11tri.hlsl.vs.h /Evs /Tvs_4_0 d3d11tri.hlsl +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// POSITION 0 xyzw 0 NONE float xyzw +// COLOR 0 xyzw 1 NONE float xyzw +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------ ------ +// SV_POSITION 0 xyzw 0 POS float xyzw +// COLOR 0 xyzw 1 NONE float xyzw +// +vs_4_0 +dcl_input v0.xyzw +dcl_input v1.xyzw +dcl_output_siv o0.xyzw, position +dcl_output o1.xyzw +mov o0.xyzw, v0.xyzw +mov o1.xyzw, v1.xyzw +ret +// Approximately 3 instruction slots used +#endif + +const BYTE g_vs[] = +{ + 68, 88, 66, 67, 190, 171, + 186, 20, 44, 105, 95, 129, + 137, 204, 223, 72, 251, 159, + 126, 176, 1, 0, 0, 0, + 28, 2, 0, 0, 5, 0, + 0, 0, 52, 0, 0, 0, + 140, 0, 0, 0, 220, 0, + 0, 0, 48, 1, 0, 0, + 160, 1, 0, 0, 82, 68, + 69, 70, 80, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 28, 0, 0, 0, 0, 4, + 254, 255, 0, 1, 0, 0, + 28, 0, 0, 0, 77, 105, + 99, 114, 111, 115, 111, 102, + 116, 32, 40, 82, 41, 32, + 72, 76, 83, 76, 32, 83, + 104, 97, 100, 101, 114, 32, + 67, 111, 109, 112, 105, 108, + 101, 114, 32, 57, 46, 50, + 57, 46, 57, 53, 50, 46, + 51, 49, 49, 49, 0, 171, + 171, 171, 73, 83, 71, 78, + 72, 0, 0, 0, 2, 0, + 0, 0, 8, 0, 0, 0, + 56, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, + 0, 0, 15, 15, 0, 0, + 65, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 0, 0, 15, 15, 0, 0, + 80, 79, 83, 73, 84, 73, + 79, 78, 0, 67, 79, 76, + 79, 82, 0, 171, 79, 83, + 71, 78, 76, 0, 0, 0, + 2, 0, 0, 0, 8, 0, + 0, 0, 56, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 15, 0, + 0, 0, 68, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 1, 0, 0, 0, 15, 0, + 0, 0, 83, 86, 95, 80, + 79, 83, 73, 84, 73, 79, + 78, 0, 67, 79, 76, 79, + 82, 0, 171, 171, 83, 72, + 68, 82, 104, 0, 0, 0, + 64, 0, 1, 0, 26, 0, + 0, 0, 95, 0, 0, 3, + 242, 16, 16, 0, 0, 0, + 0, 0, 95, 0, 0, 3, + 242, 16, 16, 0, 1, 0, + 0, 0, 103, 0, 0, 4, + 242, 32, 16, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 101, 0, 0, 3, 242, 32, + 16, 0, 1, 0, 0, 0, + 54, 0, 0, 5, 242, 32, + 16, 0, 0, 0, 0, 0, + 70, 30, 16, 0, 0, 0, + 0, 0, 54, 0, 0, 5, + 242, 32, 16, 0, 1, 0, + 0, 0, 70, 30, 16, 0, + 1, 0, 0, 0, 62, 0, + 0, 1, 83, 84, 65, 84, + 116, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 +}; diff --git a/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.vcxproj b/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.vcxproj new file mode 100755 index 0000000000..3ed69fc6f2 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/d3d11tri/d3d11tri.vcxproj @@ -0,0 +1,99 @@ +<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{1C11FC42-BFB5-4668-97F6-C5B564754F8F}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>d3d11tri</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)\d3d11app</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>d3d11.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)\d3d11app</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>d3d11.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <CustomBuild Include="d3d11tri.hlsl">
+ <FileType>Document</FileType>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">"$(DXSDK_DIR)\Utilities\bin\x86\fxc.exe" /Fh%(Identity).ps.h /Eps /Tps_4_0 %(Identity)</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).ps.h;%(Identity).vs.h;%(Outputs)</Outputs>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">"$(DXSDK_DIR)\Utilities\bin\x86\fxc.exe" /Fh%(Identity).ps.h /Eps /Tps_4_0 %(Identity)</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Identity).ps.h;%(Identity).vs.h;%(Outputs)</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\d3d11app\d3d11app.h" />
+ <ClInclude Include="d3d11tri.hlsl.ps.h" />
+ <ClInclude Include="d3d11tri.hlsl.vs.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\d3d11app\d3d11winmain.cpp" />
+ <ClCompile Include="d3d11tri.cpp" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file diff --git a/src/gallium/state_trackers/d3d1x/progs/data/cornell_box_image.h b/src/gallium/state_trackers/d3d1x/progs/data/cornell_box_image.h new file mode 100755 index 0000000000..007b151f09 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/data/cornell_box_image.h @@ -0,0 +1,1028 @@ +unsigned char g_cornell_box_image[] = +{ + 159, 93, 68, 255, + 171, 158, 120, 255, + 167, 153, 115, 255, + 167, 158, 121, 255, + 166, 161, 122, 255, + 164, 163, 122, 255, + 163, 166, 124, 255, + 165, 166, 125, 255, + 164, 167, 125, 255, + 161, 171, 126, 255, + 161, 170, 126, 255, + 162, 171, 125, 255, + 162, 172, 125, 255, + 160, 172, 125, 255, + 161, 174, 127, 255, + 159, 175, 124, 255, + 158, 175, 124, 255, + 156, 177, 124, 255, + 155, 177, 123, 255, + 154, 178, 122, 255, + 154, 178, 122, 255, + 153, 178, 120, 255, + 151, 180, 120, 255, + 150, 181, 120, 255, + 149, 181, 121, 255, + 146, 181, 118, 255, + 146, 181, 118, 255, + 143, 182, 116, 255, + 141, 183, 114, 255, + 137, 183, 112, 255, + 139, 186, 111, 255, + 89, 176, 70, 255, + 156, 49, 33, 255, + 154, 90, 62, 255, + 172, 149, 110, 255, + 169, 145, 105, 255, + 168, 149, 109, 255, + 166, 153, 112, 255, + 165, 156, 114, 255, + 164, 159, 115, 255, + 164, 162, 117, 255, + 163, 165, 118, 255, + 161, 166, 121, 255, + 160, 169, 122, 255, + 160, 170, 121, 255, + 160, 171, 120, 255, + 158, 173, 119, 255, + 159, 174, 120, 255, + 158, 174, 120, 255, + 154, 177, 119, 255, + 154, 177, 117, 255, + 151, 178, 115, 255, + 151, 179, 115, 255, + 148, 181, 113, 255, + 146, 181, 114, 255, + 142, 182, 113, 255, + 141, 181, 110, 255, + 140, 182, 108, 255, + 136, 183, 106, 255, + 132, 184, 102, 255, + 127, 185, 99, 255, + 132, 187, 101, 255, + 85, 173, 66, 255, + 44, 170, 34, 255, + 173, 59, 45, 255, + 157, 48, 32, 255, + 155, 90, 62, 255, + 171, 148, 108, 255, + 168, 143, 103, 255, + 167, 148, 105, 255, + 166, 151, 108, 255, + 167, 154, 110, 255, + 166, 158, 111, 255, + 164, 161, 113, 255, + 162, 164, 113, 255, + 162, 166, 116, 255, + 162, 168, 118, 255, + 157, 168, 109, 255, + 150, 167, 101, 255, + 150, 169, 102, 255, + 149, 169, 102, 255, + 145, 173, 100, 255, + 150, 177, 107, 255, + 151, 181, 112, 255, + 150, 182, 110, 255, + 149, 183, 108, 255, + 146, 184, 107, 255, + 143, 184, 106, 255, + 139, 185, 104, 255, + 135, 185, 102, 255, + 132, 186, 100, 255, + 128, 186, 97, 255, + 133, 189, 99, 255, + 88, 175, 62, 255, + 42, 171, 33, 255, + 56, 184, 44, 255, + 178, 64, 46, 255, + 175, 62, 45, 255, + 164, 51, 35, 255, + 158, 91, 65, 255, + 173, 148, 109, 255, + 169, 143, 103, 255, + 169, 147, 106, 255, + 167, 151, 106, 255, + 166, 154, 109, 255, + 164, 159, 111, 255, + 165, 162, 111, 255, + 165, 166, 112, 255, + 160, 165, 106, 255, + 200, 203, 172, 255, + 245, 248, 244, 255, + 242, 243, 236, 255, + 241, 243, 236, 255, + 246, 247, 244, 255, + 197, 212, 170, 255, + 155, 184, 101, 255, + 157, 189, 107, 255, + 155, 190, 105, 255, + 152, 191, 103, 255, + 149, 192, 101, 255, + 144, 193, 99, 255, + 140, 191, 95, 255, + 134, 192, 93, 255, + 136, 194, 96, 255, + 91, 179, 65, 255, + 46, 177, 34, 255, + 57, 188, 45, 255, + 59, 193, 45, 255, + 186, 65, 49, 255, + 187, 66, 49, 255, + 191, 66, 51, 255, + 176, 51, 37, 255, + 160, 88, 61, 255, + 172, 148, 108, 255, + 169, 144, 102, 255, + 168, 147, 105, 255, + 167, 151, 106, 255, + 167, 156, 108, 255, + 168, 160, 111, 255, + 167, 165, 110, 255, + 167, 169, 110, 255, + 181, 186, 132, 255, + 217, 220, 190, 255, + 215, 219, 185, 255, + 217, 222, 185, 255, + 220, 227, 187, 255, + 188, 204, 131, 255, + 172, 198, 104, 255, + 172, 202, 105, 255, + 167, 203, 102, 255, + 163, 203, 102, 255, + 157, 202, 99, 255, + 153, 201, 96, 255, + 146, 199, 93, 255, + 148, 199, 96, 255, + 95, 183, 62, 255, + 47, 191, 38, 255, + 62, 201, 51, 255, + 61, 200, 49, 255, + 61, 198, 51, 255, + 192, 66, 51, 255, + 198, 68, 54, 255, + 209, 70, 56, 255, + 207, 70, 55, 255, + 181, 52, 37, 255, + 161, 90, 62, 255, + 172, 146, 106, 255, + 171, 145, 104, 255, + 171, 150, 107, 255, + 171, 155, 109, 255, + 171, 159, 111, 255, + 171, 164, 111, 255, + 171, 168, 113, 255, + 172, 172, 110, 255, + 172, 173, 107, 255, + 172, 180, 106, 255, + 173, 187, 106, 255, + 177, 192, 107, 255, + 181, 198, 109, 255, + 181, 202, 110, 255, + 179, 203, 110, 255, + 175, 204, 105, 255, + 169, 204, 104, 255, + 163, 202, 101, 255, + 153, 202, 97, 255, + 151, 199, 97, 255, + 97, 181, 63, 255, + 51, 190, 38, 255, + 66, 222, 53, 255, + 68, 218, 56, 255, + 64, 209, 53, 255, + 62, 205, 51, 255, + 197, 67, 54, 255, + 205, 71, 58, 255, + 220, 75, 58, 255, + 229, 75, 63, 255, + 211, 71, 54, 255, + 172, 48, 31, 255, + 162, 114, 79, 255, + 160, 148, 102, 255, + 159, 149, 102, 255, + 157, 153, 104, 255, + 158, 154, 107, 255, + 158, 157, 107, 255, + 158, 160, 108, 255, + 160, 161, 112, 255, + 158, 164, 110, 255, + 158, 166, 110, 255, + 158, 167, 108, 255, + 158, 168, 109, 255, + 156, 171, 106, 255, + 153, 170, 104, 255, + 151, 170, 102, 255, + 149, 171, 99, 255, + 145, 171, 99, 255, + 142, 171, 97, 255, + 140, 173, 97, 255, + 111, 173, 78, 255, + 46, 178, 29, 255, + 68, 223, 55, 255, + 72, 242, 62, 255, + 71, 231, 60, 255, + 66, 218, 55, 255, + 64, 208, 54, 255, + 202, 68, 55, 255, + 212, 73, 56, 255, + 229, 77, 61, 255, + 244, 81, 67, 255, + 233, 77, 62, 255, + 194, 59, 41, 255, + 174, 113, 86, 255, + 171, 151, 111, 255, + 172, 155, 115, 255, + 174, 162, 122, 255, + 175, 169, 127, 255, + 179, 175, 132, 255, + 181, 182, 138, 255, + 186, 187, 145, 255, + 186, 192, 148, 255, + 188, 195, 150, 255, + 188, 196, 149, 255, + 184, 197, 146, 255, + 180, 194, 142, 255, + 176, 191, 138, 255, + 167, 189, 128, 255, + 161, 185, 121, 255, + 153, 183, 113, 255, + 148, 182, 111, 255, + 141, 183, 106, 255, + 108, 183, 78, 255, + 54, 202, 43, 255, + 74, 243, 61, 255, + 78, 255, 66, 255, + 73, 241, 63, 255, + 68, 223, 55, 255, + 63, 214, 52, 255, + 203, 70, 56, 255, + 218, 74, 61, 255, + 237, 79, 64, 255, + 253, 85, 68, 255, + 247, 80, 67, 255, + 209, 62, 47, 255, + 184, 121, 95, 255, + 184, 160, 129, 255, + 187, 166, 134, 255, + 190, 176, 142, 255, + 195, 186, 150, 255, + 201, 195, 158, 255, + 206, 204, 166, 255, + 212, 211, 175, 255, + 213, 218, 178, 255, + 215, 221, 181, 255, + 217, 224, 183, 255, + 211, 223, 179, 255, + 207, 220, 173, 255, + 199, 213, 166, 255, + 188, 210, 154, 255, + 180, 205, 145, 255, + 168, 201, 133, 255, + 158, 198, 125, 255, + 151, 197, 119, 255, + 117, 193, 91, 255, + 60, 217, 46, 255, + 78, 255, 66, 255, + 78, 255, 69, 255, + 74, 248, 65, 255, + 68, 228, 58, 255, + 65, 215, 54, 255, + 204, 70, 56, 255, + 219, 74, 62, 255, + 241, 80, 66, 255, + 255, 87, 70, 255, + 255, 82, 71, 255, + 221, 65, 52, 255, + 190, 129, 103, 255, + 192, 169, 139, 255, + 197, 175, 144, 255, + 202, 186, 154, 255, + 209, 198, 163, 255, + 215, 209, 177, 255, + 223, 218, 186, 255, + 228, 228, 192, 255, + 231, 235, 200, 255, + 234, 239, 202, 255, + 234, 241, 200, 255, + 228, 239, 197, 255, + 224, 236, 191, 255, + 215, 228, 182, 255, + 203, 224, 169, 255, + 191, 218, 156, 255, + 179, 213, 145, 255, + 168, 208, 135, 255, + 160, 204, 129, 255, + 123, 201, 96, 255, + 63, 229, 49, 255, + 79, 255, 69, 255, + 80, 255, 70, 255, + 77, 251, 66, 255, + 70, 230, 61, 255, + 66, 216, 55, 255, + 205, 71, 57, 255, + 222, 74, 63, 255, + 243, 81, 67, 255, + 255, 86, 71, 255, + 255, 85, 70, 255, + 227, 66, 52, 255, + 197, 132, 108, 255, + 198, 175, 145, 255, + 205, 183, 153, 255, + 210, 195, 164, 255, + 219, 205, 176, 255, + 225, 217, 185, 255, + 231, 227, 195, 255, + 237, 237, 203, 255, + 244, 242, 209, 255, + 243, 247, 211, 255, + 243, 251, 210, 255, + 238, 249, 206, 255, + 231, 246, 202, 255, + 224, 238, 188, 255, + 211, 232, 176, 255, + 199, 224, 166, 255, + 187, 220, 155, 255, + 175, 215, 144, 255, + 167, 211, 138, 255, + 127, 204, 103, 255, + 64, 237, 51, 255, + 80, 255, 69, 255, + 81, 255, 70, 255, + 76, 251, 66, 255, + 70, 232, 61, 255, + 66, 218, 56, 255, + 205, 70, 57, 255, + 222, 74, 63, 255, + 240, 82, 67, 255, + 255, 86, 71, 255, + 255, 85, 70, 255, + 231, 66, 54, 255, + 200, 136, 111, 255, + 203, 179, 151, 255, + 209, 186, 158, 255, + 215, 197, 169, 255, + 222, 209, 179, 255, + 231, 220, 192, 255, + 235, 230, 200, 255, + 242, 238, 207, 255, + 247, 246, 213, 255, + 245, 251, 216, 255, + 246, 251, 217, 255, + 241, 251, 213, 255, + 234, 249, 206, 255, + 227, 241, 194, 255, + 215, 236, 183, 255, + 202, 229, 172, 255, + 188, 223, 160, 255, + 177, 219, 148, 255, + 169, 215, 144, 255, + 129, 207, 108, 255, + 62, 242, 52, 255, + 80, 255, 68, 255, + 80, 255, 70, 255, + 76, 251, 66, 255, + 69, 232, 61, 255, + 66, 218, 54, 255, + 206, 70, 57, 255, + 221, 75, 63, 255, + 238, 80, 66, 255, + 255, 86, 71, 255, + 255, 84, 70, 255, + 231, 66, 54, 255, + 202, 139, 115, 255, + 205, 181, 155, 255, + 212, 188, 161, 255, + 217, 199, 171, 255, + 224, 209, 181, 255, + 231, 220, 192, 255, + 234, 229, 199, 255, + 242, 236, 207, 255, + 243, 242, 211, 255, + 243, 246, 216, 255, + 243, 249, 210, 255, + 237, 248, 209, 255, + 233, 246, 202, 255, + 227, 241, 179, 255, + 214, 236, 170, 255, + 202, 229, 160, 255, + 191, 226, 150, 255, + 177, 220, 144, 255, + 175, 217, 153, 255, + 129, 210, 110, 255, + 61, 242, 51, 255, + 78, 255, 67, 255, + 79, 255, 69, 255, + 76, 250, 66, 255, + 70, 231, 60, 255, + 66, 218, 54, 255, + 206, 71, 57, 255, + 219, 76, 63, 255, + 235, 79, 65, 255, + 251, 85, 71, 255, + 253, 83, 67, 255, + 231, 66, 53, 255, + 202, 139, 118, 255, + 206, 182, 155, 255, + 211, 188, 161, 255, + 216, 198, 170, 255, + 223, 207, 181, 255, + 228, 217, 189, 255, + 230, 226, 196, 255, + 237, 231, 201, 255, + 237, 236, 206, 255, + 239, 237, 218, 255, + 250, 252, 49, 255, + 250, 253, 59, 255, + 246, 250, 60, 255, + 240, 245, 58, 255, + 229, 238, 55, 255, + 220, 230, 51, 255, + 221, 224, 51, 255, + 77, 197, 0, 255, + 126, 209, 94, 255, + 134, 211, 117, 255, + 61, 241, 50, 255, + 73, 255, 67, 255, + 77, 255, 69, 255, + 74, 247, 64, 255, + 70, 229, 58, 255, + 66, 218, 56, 255, + 205, 71, 56, 255, + 217, 74, 61, 255, + 232, 78, 67, 255, + 245, 83, 70, 255, + 247, 80, 69, 255, + 229, 66, 51, 255, + 202, 139, 118, 255, + 206, 179, 156, 255, + 211, 185, 161, 255, + 215, 194, 170, 255, + 222, 203, 178, 255, + 225, 211, 185, 255, + 227, 219, 191, 255, + 230, 223, 195, 255, + 232, 227, 199, 255, + 233, 228, 210, 255, + 227, 231, 53, 255, + 230, 232, 66, 255, + 225, 230, 65, 255, + 220, 228, 62, 255, + 210, 225, 62, 255, + 204, 220, 60, 255, + 209, 217, 60, 255, + 82, 198, 17, 255, + 119, 208, 87, 255, + 131, 211, 115, 255, + 57, 242, 51, 255, + 69, 255, 63, 255, + 74, 255, 67, 255, + 75, 242, 64, 255, + 69, 227, 58, 255, + 66, 216, 56, 255, + 204, 70, 56, 255, + 215, 74, 60, 255, + 227, 77, 65, 255, + 237, 81, 66, 255, + 240, 79, 67, 255, + 225, 65, 50, 255, + 202, 138, 116, 255, + 204, 178, 154, 255, + 208, 182, 158, 255, + 212, 190, 166, 255, + 216, 197, 172, 255, + 219, 204, 179, 255, + 221, 211, 184, 255, + 225, 215, 188, 255, + 224, 218, 189, 255, + 225, 219, 201, 255, + 211, 216, 48, 255, + 213, 218, 62, 255, + 210, 218, 62, 255, + 207, 217, 60, 255, + 201, 215, 57, 255, + 196, 212, 57, 255, + 200, 210, 55, 255, + 84, 198, 16, 255, + 118, 208, 87, 255, + 133, 210, 116, 255, + 47, 221, 39, 255, + 68, 255, 61, 255, + 73, 253, 65, 255, + 72, 237, 62, 255, + 69, 225, 58, 255, + 66, 215, 56, 255, + 202, 69, 55, 255, + 213, 72, 59, 255, + 222, 76, 62, 255, + 230, 79, 65, 255, + 233, 76, 64, 255, + 221, 64, 49, 255, + 200, 134, 113, 255, + 201, 174, 150, 255, + 204, 178, 155, 255, + 207, 185, 161, 255, + 211, 192, 167, 255, + 213, 197, 172, 255, + 215, 203, 177, 255, + 217, 205, 179, 255, + 216, 209, 180, 255, + 216, 210, 190, 255, + 198, 204, 45, 255, + 199, 207, 55, 255, + 197, 209, 56, 255, + 194, 209, 55, 255, + 193, 206, 55, 255, + 189, 204, 55, 255, + 193, 204, 54, 255, + 85, 197, 17, 255, + 113, 204, 82, 255, + 126, 203, 107, 255, + 34, 180, 27, 255, + 63, 253, 57, 255, + 70, 249, 60, 255, + 70, 234, 61, 255, + 67, 223, 58, 255, + 65, 216, 54, 255, + 200, 69, 56, 255, + 209, 72, 59, 255, + 216, 74, 62, 255, + 223, 76, 64, 255, + 226, 74, 63, 255, + 215, 63, 51, 255, + 200, 132, 111, 255, + 197, 171, 146, 255, + 200, 173, 150, 255, + 204, 180, 156, 255, + 206, 185, 161, 255, + 209, 191, 166, 255, + 209, 195, 169, 255, + 210, 197, 172, 255, + 207, 200, 172, 255, + 209, 202, 182, 255, + 186, 196, 41, 255, + 188, 200, 53, 255, + 188, 202, 53, 255, + 187, 202, 52, 255, + 183, 202, 53, 255, + 180, 200, 51, 255, + 188, 199, 52, 255, + 84, 199, 20, 255, + 92, 190, 53, 255, + 102, 179, 78, 255, + 33, 170, 24, 255, + 51, 221, 45, 255, + 67, 244, 62, 255, + 68, 229, 58, 255, + 67, 220, 58, 255, + 64, 213, 55, 255, + 198, 68, 54, 255, + 204, 71, 58, 255, + 211, 72, 60, 255, + 216, 74, 61, 255, + 219, 71, 61, 255, + 213, 60, 49, 255, + 197, 129, 108, 255, + 195, 166, 143, 255, + 196, 168, 144, 255, + 197, 175, 149, 255, + 200, 179, 154, 255, + 201, 184, 158, 255, + 202, 188, 161, 255, + 202, 190, 163, 255, + 200, 192, 161, 255, + 202, 193, 173, 255, + 176, 190, 40, 255, + 180, 194, 50, 255, + 178, 196, 51, 255, + 178, 196, 51, 255, + 177, 195, 50, 255, + 174, 196, 50, 255, + 182, 195, 52, 255, + 86, 199, 20, 255, + 84, 184, 46, 255, + 89, 167, 62, 255, + 34, 171, 25, 255, + 39, 199, 31, 255, + 65, 238, 59, 255, + 67, 225, 57, 255, + 66, 217, 57, 255, + 64, 212, 54, 255, + 196, 67, 53, 255, + 200, 70, 56, 255, + 205, 70, 58, 255, + 209, 71, 58, 255, + 212, 69, 58, 255, + 208, 59, 47, 255, + 199, 127, 104, 255, + 200, 154, 133, 255, + 199, 156, 133, 255, + 202, 161, 136, 255, + 203, 165, 139, 255, + 204, 171, 143, 255, + 203, 174, 144, 255, + 202, 176, 146, 255, + 199, 180, 145, 255, + 197, 184, 162, 255, + 169, 185, 37, 255, + 170, 190, 48, 255, + 172, 190, 50, 255, + 173, 191, 50, 255, + 172, 191, 50, 255, + 171, 192, 47, 255, + 178, 191, 50, 255, + 86, 198, 21, 255, + 86, 184, 48, 255, + 88, 167, 62, 255, + 35, 173, 27, 255, + 35, 187, 27, 255, + 64, 229, 56, 255, + 66, 221, 57, 255, + 65, 214, 55, 255, + 62, 210, 52, 255, + 194, 68, 52, 255, + 197, 67, 54, 255, + 197, 67, 54, 255, + 201, 66, 55, 255, + 203, 66, 55, 255, + 207, 57, 46, 255, + 142, 121, 104, 255, + 122, 229, 199, 255, + 129, 224, 200, 255, + 123, 231, 211, 255, + 118, 241, 229, 255, + 119, 241, 235, 255, + 120, 244, 238, 255, + 112, 251, 250, 255, + 144, 226, 217, 255, + 174, 192, 170, 255, + 159, 183, 34, 255, + 162, 184, 44, 255, + 164, 187, 46, 255, + 164, 188, 48, 255, + 165, 189, 46, 255, + 165, 189, 47, 255, + 173, 187, 48, 255, + 85, 195, 22, 255, + 86, 185, 48, 255, + 87, 168, 64, 255, + 35, 175, 26, 255, + 32, 183, 23, 255, + 61, 223, 51, 255, + 62, 218, 55, 255, + 63, 210, 52, 255, + 61, 209, 53, 255, + 193, 67, 53, 255, + 193, 67, 52, 255, + 191, 63, 54, 255, + 190, 63, 49, 255, + 196, 63, 51, 255, + 207, 61, 49, 255, + 92, 64, 48, 255, + 46, 154, 127, 255, + 52, 196, 156, 255, + 54, 192, 154, 255, + 49, 186, 143, 255, + 48, 185, 143, 255, + 47, 182, 138, 255, + 43, 176, 129, 255, + 40, 180, 133, 255, + 28, 180, 142, 255, + 152, 182, 35, 255, + 149, 184, 44, 255, + 155, 183, 44, 255, + 158, 185, 45, 255, + 160, 185, 43, 255, + 161, 187, 45, 255, + 167, 186, 49, 255, + 87, 195, 20, 255, + 86, 184, 46, 255, + 87, 168, 65, 255, + 35, 176, 26, 255, + 32, 183, 23, 255, + 60, 217, 49, 255, + 62, 214, 54, 255, + 64, 208, 51, 255, + 61, 206, 51, 255, + 189, 65, 51, 255, + 190, 65, 51, 255, + 183, 60, 52, 255, + 182, 60, 52, 255, + 189, 61, 48, 255, + 202, 59, 47, 255, + 98, 71, 58, 255, + 50, 145, 114, 255, + 52, 184, 142, 255, + 50, 179, 139, 255, + 51, 180, 139, 255, + 52, 181, 138, 255, + 52, 181, 138, 255, + 51, 181, 137, 255, + 51, 181, 135, 255, + 42, 181, 144, 255, + 141, 182, 34, 255, + 141, 180, 44, 255, + 148, 182, 45, 255, + 151, 182, 45, 255, + 152, 184, 47, 255, + 154, 184, 45, 255, + 162, 184, 48, 255, + 87, 194, 21, 255, + 89, 182, 47, 255, + 87, 168, 66, 255, + 35, 176, 27, 255, + 32, 184, 24, 255, + 57, 213, 47, 255, + 61, 210, 52, 255, + 61, 206, 50, 255, + 60, 205, 51, 255, + 186, 65, 49, 255, + 185, 63, 49, 255, + 177, 59, 50, 255, + 175, 57, 48, 255, + 182, 60, 47, 255, + 196, 57, 44, 255, + 95, 71, 58, 255, + 50, 146, 114, 255, + 51, 184, 142, 255, + 50, 180, 139, 255, + 50, 179, 139, 255, + 52, 182, 139, 255, + 51, 181, 137, 255, + 50, 180, 137, 255, + 51, 181, 135, 255, + 41, 179, 141, 255, + 136, 178, 32, 255, + 135, 180, 40, 255, + 139, 181, 41, 255, + 145, 180, 41, 255, + 150, 181, 44, 255, + 152, 179, 43, 255, + 159, 181, 47, 255, + 86, 192, 21, 255, + 92, 181, 47, 255, + 87, 169, 66, 255, + 35, 176, 26, 255, + 33, 184, 24, 255, + 56, 211, 47, 255, + 59, 206, 49, 255, + 59, 203, 48, 255, + 58, 203, 51, 255, + 184, 65, 50, 255, + 183, 61, 49, 255, + 172, 57, 50, 255, + 167, 53, 45, 255, + 178, 57, 48, 255, + 191, 56, 41, 255, + 95, 71, 57, 255, + 51, 145, 116, 255, + 52, 184, 141, 255, + 53, 180, 139, 255, + 53, 180, 139, 255, + 52, 178, 136, 255, + 52, 178, 135, 255, + 52, 178, 136, 255, + 48, 179, 133, 255, + 43, 179, 143, 255, + 133, 176, 32, 255, + 131, 177, 39, 255, + 137, 177, 42, 255, + 141, 177, 43, 255, + 145, 178, 43, 255, + 148, 178, 44, 255, + 154, 178, 46, 255, + 91, 189, 22, 255, + 96, 176, 55, 255, + 85, 167, 64, 255, + 37, 173, 28, 255, + 33, 182, 25, 255, + 55, 207, 47, 255, + 59, 202, 52, 255, + 61, 200, 49, 255, + 60, 200, 49, 255, + 182, 63, 50, 255, + 177, 61, 48, 255, + 170, 56, 48, 255, + 151, 48, 41, 255, + 173, 56, 44, 255, + 182, 44, 33, 255, + 94, 80, 67, 255, + 51, 145, 115, 255, + 52, 181, 140, 255, + 52, 176, 136, 255, + 50, 178, 137, 255, + 51, 178, 136, 255, + 51, 178, 135, 255, + 49, 176, 134, 255, + 49, 177, 132, 255, + 42, 177, 140, 255, + 129, 173, 30, 255, + 129, 174, 38, 255, + 134, 174, 40, 255, + 139, 174, 41, 255, + 142, 176, 39, 255, + 145, 175, 41, 255, + 152, 174, 43, 255, + 91, 187, 22, 255, + 105, 173, 61, 255, + 96, 164, 77, 255, + 31, 169, 23, 255, + 32, 180, 25, 255, + 53, 202, 47, 255, + 57, 199, 48, 255, + 59, 197, 48, 255, + 59, 199, 48, 255, + 181, 62, 50, 255, + 175, 60, 48, 255, + 166, 54, 47, 255, + 130, 44, 35, 255, + 162, 40, 31, 255, + 209, 130, 111, 255, + 105, 125, 103, 255, + 48, 141, 111, 255, + 52, 179, 138, 255, + 52, 174, 136, 255, + 50, 176, 136, 255, + 50, 176, 134, 255, + 50, 176, 133, 255, + 50, 175, 133, 255, + 49, 175, 131, 255, + 42, 175, 138, 255, + 126, 164, 20, 255, + 122, 166, 33, 255, + 129, 169, 35, 255, + 134, 170, 36, 255, + 138, 171, 41, 255, + 142, 171, 43, 255, + 148, 172, 45, 255, + 101, 182, 24, 255, + 89, 162, 53, 255, + 104, 170, 83, 255, + 72, 170, 55, 255, + 29, 172, 24, 255, + 52, 198, 46, 255, + 56, 196, 46, 255, + 57, 195, 48, 255, + 58, 198, 47, 255, + 178, 61, 48, 255, + 171, 58, 48, 255, + 156, 53, 45, 255, + 119, 28, 25, 255, + 193, 119, 102, 255, + 228, 193, 167, 255, + 96, 121, 99, 255, + 47, 142, 110, 255, + 52, 175, 137, 255, + 51, 171, 133, 255, + 50, 172, 134, 255, + 50, 172, 132, 255, + 50, 172, 131, 255, + 49, 171, 131, 255, + 49, 172, 128, 255, + 36, 169, 127, 255, + 172, 197, 99, 255, + 153, 186, 86, 255, + 148, 182, 71, 255, + 143, 176, 58, 255, + 139, 170, 45, 255, + 135, 163, 30, 255, + 139, 163, 27, 255, + 84, 155, 25, 255, + 57, 136, 49, 255, + 71, 151, 54, 255, + 85, 160, 67, 255, + 62, 166, 49, 255, + 42, 189, 34, 255, + 53, 194, 45, 255, + 54, 192, 48, 255, + 58, 196, 48, 255, + 176, 61, 47, 255, + 168, 58, 45, 255, + 140, 39, 34, 255, + 140, 73, 55, 255, + 152, 103, 77, 255, + 141, 78, 59, 255, + 90, 80, 65, 255, + 45, 148, 114, 255, + 50, 173, 134, 255, + 50, 169, 132, 255, + 50, 170, 130, 255, + 48, 170, 129, 255, + 46, 170, 129, 255, + 46, 171, 130, 255, + 44, 171, 128, 255, + 23, 161, 117, 255, + 225, 253, 204, 255, + 211, 244, 195, 255, + 212, 245, 199, 255, + 213, 245, 199, 255, + 215, 243, 193, 255, + 213, 243, 190, 255, + 208, 240, 182, 255, + 193, 235, 176, 255, + 185, 231, 167, 255, + 180, 225, 163, 255, + 172, 223, 156, 255, + 176, 221, 158, 255, + 107, 206, 96, 255, + 39, 183, 33, 255, + 55, 188, 47, 255, + 57, 193, 49, 255, + 173, 60, 45, 255, + 159, 45, 33, 255, + 159, 91, 77, 255, + 157, 108, 85, 255, + 146, 94, 74, 255, + 139, 87, 72, 255, + 134, 71, 56, 255, + 42, 143, 109, 255, + 37, 172, 131, 255, + 39, 167, 128, 255, + 43, 164, 123, 255, + 48, 159, 118, 255, + 52, 155, 115, 255, + 58, 151, 112, 255, + 63, 149, 104, 255, + 73, 149, 104, 255, + 206, 233, 185, 255, + 208, 237, 190, 255, + 207, 237, 188, 255, + 205, 237, 186, 255, + 203, 237, 185, 255, + 199, 237, 179, 255, + 199, 236, 178, 255, + 196, 235, 174, 255, + 192, 233, 170, 255, + 190, 230, 167, 255, + 184, 228, 162, 255, + 178, 227, 158, 255, + 187, 227, 167, 255, + 114, 205, 102, 255, + 38, 181, 33, 255, + 55, 191, 48, 255, + 164, 44, 34, 255, + 178, 112, 95, 255, + 183, 153, 126, 255, + 158, 121, 90, 255, + 151, 117, 88, 255, + 148, 122, 92, 255, + 148, 126, 95, 255, + 140, 136, 102, 255, + 135, 144, 109, 255, + 145, 148, 114, 255, + 151, 157, 121, 255, + 160, 167, 131, 255, + 170, 178, 141, 255, + 178, 190, 149, 255, + 188, 201, 162, 255, + 199, 213, 173, 255, + 199, 220, 181, 255, + 205, 226, 183, 255, + 203, 227, 182, 255, + 204, 228, 182, 255, + 202, 228, 180, 255, + 200, 230, 179, 255, + 199, 229, 178, 255, + 196, 227, 176, 255, + 193, 228, 172, 255, + 189, 226, 168, 255, + 186, 225, 165, 255, + 181, 223, 161, 255, + 177, 223, 155, 255, + 184, 226, 163, 255, + 112, 203, 98, 255, + 39, 183, 34, 255, + 185, 121, 100, 255, + 206, 188, 162, 255, + 196, 177, 149, 255, + 193, 178, 146, 255, + 192, 186, 153, 255, + 198, 192, 161, 255, + 204, 198, 169, 255, + 207, 203, 173, 255, + 210, 206, 175, 255, + 207, 212, 178, 255, + 209, 213, 179, 255, + 208, 216, 181, 255, + 207, 216, 182, 255, + 207, 218, 183, 255, + 206, 217, 179, 255, + 204, 219, 180, 255, + 203, 219, 180, 255, + 203, 220, 178, 255, + 201, 221, 178, 255, + 202, 222, 178, 255, + 201, 222, 176, 255, + 199, 223, 177, 255, + 198, 222, 176, 255, + 195, 222, 173, 255, + 194, 222, 174, 255, + 193, 221, 170, 255, + 191, 220, 167, 255, + 187, 220, 166, 255, + 182, 220, 162, 255, + 180, 218, 158, 255, + 186, 221, 161, 255, + 111, 202, 97, 255, +}; + diff --git a/src/gallium/state_trackers/d3d1x/progs/data/tux_image.h b/src/gallium/state_trackers/d3d1x/progs/data/tux_image.h new file mode 100755 index 0000000000..53aeffa558 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/data/tux_image.h @@ -0,0 +1,1028 @@ +unsigned char g_tux_image[] = +{ + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 249, 249, 249, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 234, 234, 234, 255, + 23, 23, 25, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 81, 81, 83, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 1, 1, 3, 255, + 0, 0, 2, 255, + 8, 7, 9, 255, + 71, 71, 70, 255, + 0, 0, 0, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 106, 106, 107, 255, + 0, 0, 0, 255, + 1, 1, 3, 255, + 2, 2, 4, 255, + 0, 0, 2, 255, + 13, 13, 13, 255, + 46, 46, 46, 255, + 0, 0, 0, 255, + 57, 57, 59, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 33, 33, 35, 255, + 0, 0, 0, 255, + 0, 0, 1, 255, + 1, 1, 3, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 0, 0, 1, 255, + 0, 0, 0, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 10, 10, 13, 255, + 87, 87, 87, 255, + 43, 42, 43, 255, + 0, 0, 0, 255, + 17, 17, 18, 255, + 163, 163, 162, 255, + 71, 70, 70, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 213, 213, 213, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 29, 29, 32, 255, + 178, 178, 177, 255, + 210, 212, 215, 255, + 0, 0, 0, 255, + 213, 215, 219, 255, + 112, 112, 112, 255, + 235, 235, 234, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 176, 176, 177, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 78, 79, 79, 255, + 43, 47, 59, 255, + 103, 94, 77, 255, + 103, 77, 0, 255, + 168, 156, 123, 255, + 0, 0, 0, 255, + 179, 182, 191, 255, + 1, 1, 3, 255, + 0, 0, 0, 255, + 182, 182, 183, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 31, 35, 45, 255, + 172, 146, 99, 255, + 237, 173, 0, 255, + 248, 208, 9, 255, + 250, 215, 39, 255, + 220, 183, 4, 255, + 223, 201, 120, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 151, 151, 152, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 93, 89, 86, 255, + 216, 146, 0, 255, + 245, 193, 9, 255, + 243, 215, 34, 255, + 255, 229, 32, 255, + 228, 193, 8, 255, + 238, 173, 0, 255, + 0, 0, 2, 255, + 0, 0, 0, 255, + 92, 92, 93, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 77, 81, 91, 255, + 120, 94, 41, 255, + 209, 163, 0, 255, + 215, 176, 9, 255, + 189, 133, 0, 255, + 198, 135, 4, 255, + 198, 176, 136, 255, + 0, 0, 0, 255, + 115, 115, 114, 255, + 0, 0, 0, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 0, 0, 0, 255, + 181, 186, 198, 255, + 169, 142, 94, 255, + 174, 119, 18, 255, + 174, 136, 68, 255, + 189, 193, 203, 255, + 255, 255, 255, 255, + 84, 84, 84, 255, + 2, 2, 3, 255, + 0, 0, 0, 255, + 107, 106, 107, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 116, 116, 116, 255, + 35, 35, 36, 255, + 255, 255, 255, 255, + 185, 187, 193, 255, + 182, 186, 197, 255, + 214, 218, 225, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 0, 0, 0, 255, + 0, 0, 1, 255, + 0, 0, 0, 255, + 248, 248, 248, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 198, 198, 199, 255, + 0, 0, 0, 255, + 246, 246, 244, 255, + 255, 255, 255, 255, + 252, 253, 251, 255, + 250, 250, 249, 255, + 255, 255, 255, 255, + 255, 255, 253, 255, + 255, 255, 253, 255, + 255, 255, 255, 255, + 68, 68, 69, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 16, 16, 18, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 246, 246, 246, 255, + 0, 0, 0, 255, + 26, 25, 26, 255, + 254, 255, 254, 255, + 255, 255, 253, 255, + 243, 243, 242, 255, + 241, 241, 240, 255, + 253, 253, 251, 255, + 247, 247, 246, 255, + 236, 236, 235, 255, + 236, 235, 235, 255, + 132, 131, 132, 255, + 0, 0, 0, 255, + 0, 0, 1, 255, + 0, 0, 0, 255, + 46, 46, 47, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 67, 67, 69, 255, + 0, 0, 0, 255, + 80, 80, 80, 255, + 243, 243, 242, 255, + 254, 254, 252, 255, + 245, 245, 243, 255, + 252, 252, 251, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 235, 235, 233, 255, + 214, 213, 213, 255, + 224, 223, 223, 255, + 31, 31, 32, 255, + 23, 23, 23, 255, + 1, 0, 2, 255, + 0, 0, 0, 255, + 179, 179, 179, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 253, 253, 251, 255, + 254, 254, 252, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 255, 255, 253, 255, + 255, 255, 255, 255, + 253, 253, 252, 255, + 184, 184, 184, 255, + 0, 0, 0, 255, + 32, 32, 32, 255, + 0, 0, 0, 255, + 37, 37, 39, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 189, 189, 189, 255, + 0, 0, 0, 255, + 111, 111, 111, 255, + 255, 255, 255, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 248, 248, 247, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 253, 253, 251, 255, + 252, 252, 251, 255, + 252, 252, 250, 255, + 254, 254, 252, 255, + 255, 255, 255, 255, + 15, 15, 16, 255, + 16, 16, 17, 255, + 12, 11, 13, 255, + 0, 0, 0, 255, + 247, 247, 246, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 51, 51, 52, 255, + 0, 0, 0, 255, + 250, 250, 248, 255, + 255, 255, 255, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 240, 240, 239, 255, + 254, 254, 252, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 252, 252, 251, 255, + 253, 253, 250, 255, + 253, 253, 251, 255, + 255, 255, 255, 255, + 56, 56, 57, 255, + 0, 0, 0, 255, + 24, 24, 25, 255, + 0, 0, 0, 255, + 152, 152, 153, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 200, 200, 201, 255, + 0, 0, 0, 255, + 19, 19, 20, 255, + 255, 255, 255, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 235, 235, 234, 255, + 254, 254, 252, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 255, 255, 255, 255, + 83, 83, 83, 255, + 0, 0, 0, 255, + 18, 18, 18, 255, + 0, 0, 0, 255, + 95, 95, 97, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 34, 35, 39, 255, + 10, 12, 19, 255, + 69, 69, 70, 255, + 255, 255, 255, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 237, 237, 236, 255, + 254, 254, 252, 255, + 252, 252, 250, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 255, 255, 255, 255, + 91, 91, 92, 255, + 2, 2, 4, 255, + 1, 1, 3, 255, + 0, 0, 0, 255, + 81, 81, 81, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 53, 51, 49, 255, + 49, 33, 0, 255, + 62, 67, 84, 255, + 255, 255, 255, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 251, 251, 250, 255, + 240, 240, 239, 255, + 254, 254, 252, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 253, 254, 253, 255, + 255, 255, 255, 255, + 63, 65, 73, 255, + 9, 9, 10, 255, + 11, 11, 12, 255, + 13, 13, 15, 255, + 104, 105, 114, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 233, 174, 24, 255, + 255, 217, 8, 255, + 107, 77, 0, 255, + 131, 137, 154, 255, + 255, 255, 255, 255, + 254, 254, 252, 255, + 250, 250, 249, 255, + 239, 239, 237, 255, + 254, 254, 252, 255, + 252, 252, 250, 255, + 252, 252, 251, 255, + 253, 253, 251, 255, + 253, 254, 253, 255, + 251, 249, 248, 255, + 255, 205, 14, 255, + 87, 70, 6, 255, + 0, 0, 1, 255, + 0, 0, 0, 255, + 0, 2, 14, 255, + 216, 203, 137, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 230, 207, 159, 255, + 216, 168, 49, 255, + 207, 153, 25, 255, + 239, 180, 4, 255, + 246, 190, 11, 255, + 255, 214, 12, 255, + 25, 13, 0, 255, + 79, 81, 89, 255, + 255, 255, 255, 255, + 255, 255, 254, 255, + 250, 250, 249, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 255, 255, 255, 255, + 231, 227, 222, 255, + 250, 184, 0, 255, + 101, 71, 5, 255, + 0, 0, 3, 255, + 0, 0, 3, 255, + 112, 83, 2, 255, + 250, 193, 0, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 210, 154, 18, 255, + 247, 187, 0, 255, + 247, 189, 7, 255, + 246, 189, 11, 255, + 245, 189, 12, 255, + 249, 191, 12, 255, + 240, 191, 11, 255, + 0, 0, 0, 255, + 24, 25, 26, 255, + 255, 255, 255, 255, + 255, 255, 253, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 252, 252, 251, 255, + 253, 253, 251, 255, + 255, 255, 255, 255, + 188, 186, 184, 255, + 232, 168, 0, 255, + 216, 154, 8, 255, + 158, 108, 7, 255, + 182, 127, 7, 255, + 251, 190, 10, 255, + 243, 181, 0, 255, + 250, 247, 244, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 215, 169, 62, 255, + 245, 186, 4, 255, + 245, 189, 11, 255, + 245, 189, 12, 255, + 245, 189, 12, 255, + 245, 188, 12, 255, + 255, 204, 12, 255, + 125, 99, 7, 255, + 0, 0, 0, 255, + 129, 128, 128, 255, + 255, 255, 255, 255, + 253, 253, 251, 255, + 253, 253, 251, 255, + 252, 252, 250, 255, + 253, 253, 251, 255, + 255, 255, 255, 255, + 181, 178, 176, 255, + 220, 156, 0, 255, + 241, 183, 11, 255, + 232, 173, 9, 255, + 239, 181, 10, 255, + 247, 191, 12, 255, + 245, 188, 8, 255, + 234, 176, 3, 255, + 250, 248, 243, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 216, 173, 80, 255, + 243, 184, 4, 255, + 245, 189, 11, 255, + 245, 188, 12, 255, + 245, 189, 12, 255, + 245, 189, 12, 255, + 246, 188, 12, 255, + 255, 203, 2, 255, + 120, 112, 98, 255, + 255, 255, 255, 255, + 254, 254, 253, 255, + 252, 252, 251, 255, + 253, 253, 251, 255, + 255, 255, 254, 255, + 255, 255, 255, 255, + 213, 214, 216, 255, + 8, 0, 0, 255, + 235, 173, 7, 255, + 248, 192, 12, 255, + 247, 191, 12, 255, + 246, 190, 12, 255, + 245, 189, 12, 255, + 245, 189, 11, 255, + 247, 190, 7, 255, + 238, 169, 0, 255, + 249, 245, 236, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 206, 145, 1, 255, + 249, 191, 9, 255, + 246, 190, 12, 255, + 245, 189, 11, 255, + 245, 189, 11, 255, + 245, 189, 12, 255, + 245, 189, 12, 255, + 247, 191, 9, 255, + 197, 150, 8, 255, + 229, 234, 248, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 104, 104, 104, 255, + 0, 0, 0, 255, + 35, 21, 1, 255, + 236, 174, 10, 255, + 248, 191, 12, 255, + 245, 189, 12, 255, + 245, 189, 11, 255, + 246, 191, 11, 255, + 248, 190, 4, 255, + 234, 168, 0, 255, + 222, 178, 80, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 245, 240, 231, 255, + 193, 121, 0, 255, + 232, 170, 0, 255, + 239, 180, 4, 255, + 248, 191, 12, 255, + 250, 193, 12, 255, + 248, 192, 12, 255, + 246, 190, 12, 255, + 251, 194, 12, 255, + 220, 157, 4, 255, + 9, 0, 0, 255, + 10, 12, 17, 255, + 30, 30, 31, 255, + 0, 0, 2, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 0, 0, 0, 255, + 62, 38, 2, 255, + 232, 169, 9, 255, + 250, 194, 12, 255, + 247, 191, 12, 255, + 248, 191, 12, 255, + 229, 164, 0, 255, + 210, 162, 50, 255, + 241, 234, 228, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 237, 231, 222, 255, + 207, 183, 141, 255, + 181, 135, 49, 255, + 158, 97, 0, 255, + 182, 120, 0, 255, + 220, 157, 0, 255, + 243, 183, 11, 255, + 236, 175, 10, 255, + 164, 104, 0, 255, + 26, 11, 0, 255, + 42, 44, 49, 255, + 75, 75, 76, 255, + 75, 75, 76, 255, + 78, 78, 80, 255, + 82, 82, 83, 255, + 38, 41, 49, 255, + 48, 27, 0, 255, + 197, 134, 3, 255, + 237, 178, 11, 255, + 237, 176, 8, 255, + 187, 122, 0, 255, + 214, 191, 154, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 217, 211, 206, 255, + 163, 137, 96, 255, + 112, 63, 0, 255, + 105, 53, 0, 255, + 91, 63, 21, 255, + 248, 251, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 219, 217, 212, 255, + 95, 48, 0, 255, + 143, 84, 0, 255, + 124, 71, 0, 255, + 210, 201, 190, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 231, 232, 234, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 222, 223, 226, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, +}; + diff --git a/src/gallium/state_trackers/d3d1x/progs/progs.sln b/src/gallium/state_trackers/d3d1x/progs/progs.sln new file mode 100755 index 0000000000..13c2d6e581 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/progs/progs.sln @@ -0,0 +1,49 @@ +
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "d3d11app", "d3d11app", "{77576C4F-7281-41FB-A5C7-D12707AB9ED0}"
+ ProjectSection(SolutionItems) = preProject
+ d3d11app\d3d11blit.hlsl = d3d11app\d3d11blit.hlsl
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "d3d11gears", "d3d11gears\d3d11gears.vcxproj", "{706313AB-8F2C-48D2-9F67-31AA043F48C9}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "d3d11tri", "d3d11tri\d3d11tri.vcxproj", "{1C11FC42-BFB5-4668-97F6-C5B564754F8F}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "d3d11spikysphere", "d3d11spikysphere\d3d11spikysphere.vcxproj", "{64988608-72A3-4125-8A31-45E1EACE8F0A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "d3d11tex", "d3d11tex\d3d11tex.vcxproj", "{14F73B97-2DC6-423E-97D9-64E3368713DC}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "d3d10tri", "d3d10tri\d3d10tri.vcxproj", "{5366F4FD-0E6C-40CC-B2F2-CE3D350F0729}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Release|Win32 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {706313AB-8F2C-48D2-9F67-31AA043F48C9}.Debug|Win32.ActiveCfg = Debug|Win32
+ {706313AB-8F2C-48D2-9F67-31AA043F48C9}.Debug|Win32.Build.0 = Debug|Win32
+ {706313AB-8F2C-48D2-9F67-31AA043F48C9}.Release|Win32.ActiveCfg = Release|Win32
+ {706313AB-8F2C-48D2-9F67-31AA043F48C9}.Release|Win32.Build.0 = Release|Win32
+ {1C11FC42-BFB5-4668-97F6-C5B564754F8F}.Debug|Win32.ActiveCfg = Debug|Win32
+ {1C11FC42-BFB5-4668-97F6-C5B564754F8F}.Debug|Win32.Build.0 = Debug|Win32
+ {1C11FC42-BFB5-4668-97F6-C5B564754F8F}.Release|Win32.ActiveCfg = Release|Win32
+ {1C11FC42-BFB5-4668-97F6-C5B564754F8F}.Release|Win32.Build.0 = Release|Win32
+ {64988608-72A3-4125-8A31-45E1EACE8F0A}.Debug|Win32.ActiveCfg = Debug|Win32
+ {64988608-72A3-4125-8A31-45E1EACE8F0A}.Debug|Win32.Build.0 = Debug|Win32
+ {64988608-72A3-4125-8A31-45E1EACE8F0A}.Release|Win32.ActiveCfg = Release|Win32
+ {64988608-72A3-4125-8A31-45E1EACE8F0A}.Release|Win32.Build.0 = Release|Win32
+ {14F73B97-2DC6-423E-97D9-64E3368713DC}.Debug|Win32.ActiveCfg = Debug|Win32
+ {14F73B97-2DC6-423E-97D9-64E3368713DC}.Debug|Win32.Build.0 = Debug|Win32
+ {14F73B97-2DC6-423E-97D9-64E3368713DC}.Release|Win32.ActiveCfg = Release|Win32
+ {14F73B97-2DC6-423E-97D9-64E3368713DC}.Release|Win32.Build.0 = Release|Win32
+ {5366F4FD-0E6C-40CC-B2F2-CE3D350F0729}.Debug|Win32.ActiveCfg = Debug|Win32
+ {5366F4FD-0E6C-40CC-B2F2-CE3D350F0729}.Debug|Win32.Build.0 = Debug|Win32
+ {5366F4FD-0E6C-40CC-B2F2-CE3D350F0729}.Release|Win32.ActiveCfg = Release|Win32
+ {5366F4FD-0E6C-40CC-B2F2-CE3D350F0729}.Release|Win32.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/src/gallium/state_trackers/d3d1x/tools/fxc b/src/gallium/state_trackers/d3d1x/tools/fxc new file mode 100755 index 0000000000..0cf76a0af6 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/tools/fxc @@ -0,0 +1,16 @@ +#!/bin/bash +dir="$(dirname "$0")/../mstools" +(cd "$dir"; ./download-mstools) + +arch="$(uname -m)" +if test "$arch" == i386 || test "$arch" == i486 || test "$arch" == i586 || test "$arch" == i686 || test "$arch" == x86_64; then + emu="wine" +else + emu="qemu-i386 wine" +fi +exe="$dir/fxc.exe" +if test "$#" == 0 || test "$1" == "--help"; then + exec $emu "$exe" "/?" +else + exec $emu "$exe" "$@" +fi diff --git a/src/gallium/state_trackers/d3d1x/w32api b/src/gallium/state_trackers/d3d1x/w32api new file mode 120000 index 0000000000..e47a1989e1 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/w32api @@ -0,0 +1 @@ +/usr/include/wine/windows
\ No newline at end of file diff --git a/src/gallium/state_trackers/d3d1x/winedlls/Makefile b/src/gallium/state_trackers/d3d1x/winedlls/Makefile new file mode 100644 index 0000000000..c7e51b2fdd --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/Makefile @@ -0,0 +1,11 @@ +SUBDIRS=dxgi d3d10 d3d10_1 d3d11 + +all: + @for dir in $(SUBDIRS) ; do $(MAKE) -C "$$dir" || exit $?; done + +clean: + rm -f `find . -name \*.[oa]` + rm -f `find . -name depend` + +install: + sudo install */*.dll.so /usr/lib/wine diff --git a/src/gallium/state_trackers/d3d1x/winedlls/Makefile.wine b/src/gallium/state_trackers/d3d1x/winedlls/Makefile.wine new file mode 100644 index 0000000000..c9a06876c4 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/Makefile.wine @@ -0,0 +1,23 @@ +TOP=../../../../../.. +D3D1X=../.. +include $(TOP)/configs/current +CFLAGS=$(CXXFLAGS) + +all: lib$(LIBNAME).def lib$(LIBNAME).cross.a $(LIBNAME).dll.so + +%.dll.fake: %.spec $(OBJECTS) version.res + wineg++ -m32 -fasynchronous-unwind-tables -shared $^ -o $@ $(LDADD) + +%.dll.so: %.spec $(OBJECTS) version.res + wineg++ -m32 -fasynchronous-unwind-tables -shared $^ -o $@ $(LDADD) + +lib%.def: %.spec + winebuild -w --def -o $@ --export $< + +lib%.cross.a: %.spec + winebuild -m32 -b i586-mingw32msvc -w --implib -o $@ --export $< + +version.res: version.rc + wrc --nostdinc -I. -I. -I../../include -I../../include -D__WINESRC__ -fo$@ $^ + +include ../../../../Makefile.template diff --git a/src/gallium/state_trackers/d3d1x/winedlls/d3d10/Makefile b/src/gallium/state_trackers/d3d1x/winedlls/d3d10/Makefile new file mode 100644 index 0000000000..0ea5ffea0d --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/d3d10/Makefile @@ -0,0 +1,6 @@ +LIBNAME=d3d10 +LIBRARY_INCLUDES=-I$(D3D1X)/gd3dapi -I$(D3D1X)/d3dapi -I$(D3D1X)/w32api +OBJECTS=../../dxgid3d10/libdxgid3d10.a ../../gd3d10/libgd3d10.a ../../gd3d1x/libgd3d1x.a ../../d3d1xshader/libd3d1xshader.a ../../d3d1xstutil/libd3d1xstutil.a ../../../../auxiliary/libgallium.a +LDADD=-L../dxgi -ldxgi -ldl + +include ../Makefile.wine diff --git a/src/gallium/state_trackers/d3d1x/winedlls/d3d10/d3d10.spec b/src/gallium/state_trackers/d3d1x/winedlls/d3d10/d3d10.spec new file mode 100644 index 0000000000..4a68ab58db --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/d3d10/d3d10.spec @@ -0,0 +1,33 @@ +@ stub D3D10CompileEffectFromMemory +@ stub D3D10CompileShader +@ stdcall D3D10CreateBlob(long ptr) +@ stdcall D3D10CreateDevice(ptr long ptr long long ptr) +@ stdcall D3D10CreateDeviceAndSwapChain(ptr long ptr long long ptr ptr ptr) +@ stub D3D10CreateEffectFromMemory +@ stub D3D10CreateEffectPoolFromMemory +@ stub D3D10CreateStateBlock +@ stub D3D10DisassembleEffect +@ stub D3D10DisassembleShader +@ stdcall D3D10GetGeometryShaderProfile(ptr) +@ stdcall D3D10GetInputAndOutputSignatureBlob(ptr long ptr) +@ stdcall D3D10GetInputSignatureBlob(ptr long ptr) +@ stdcall D3D10GetOutputSignatureBlob(ptr long ptr) +@ stdcall D3D10GetPixelShaderProfile(ptr) +@ stub D3D10GetShaderDebugInfo +@ stub D3D10GetVersion +@ stdcall D3D10GetVertexShaderProfile(ptr) +@ stub D3D10PreprocessShader +@ stub D3D10ReflectShader +@ stub D3D10RegisterLayers +@ stub D3D10StateBlockMaskDifference +@ stub D3D10StateBlockMaskDisableAll +@ stub D3D10StateBlockMaskDisableCapture +@ stub D3D10StateBlockMaskEnableAll +@ stub D3D10StateBlockMaskEnableCapture +@ stub D3D10StateBlockMaskGetSetting +@ stub D3D10StateBlockMaskIntersect +@ stub D3D10StateBlockMaskUnion + +@ stdcall D3D10CreateDevice1(ptr long ptr long long long ptr) +@ stdcall D3D10CreateDeviceAndSwapChain1(ptr long ptr long long long ptr ptr ptr) + diff --git a/src/gallium/state_trackers/d3d1x/winedlls/d3d10/version.rc b/src/gallium/state_trackers/d3d1x/winedlls/d3d10/version.rc new file mode 100644 index 0000000000..0575ab8b57 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/d3d10/version.rc @@ -0,0 +1,3 @@ +#define FILENAME "d3d10" +#define NAME "D3D10" +#include "../version.rc.h" diff --git a/src/gallium/state_trackers/d3d1x/winedlls/d3d10_1/Makefile b/src/gallium/state_trackers/d3d1x/winedlls/d3d10_1/Makefile new file mode 100644 index 0000000000..60cdca1af9 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/d3d10_1/Makefile @@ -0,0 +1,6 @@ +LIBNAME=d3d10_1 +LIBRARY_INCLUDES= +OBJECTS= +LDADD=-L../d3d10 -ld3d10 + +include ../Makefile.wine diff --git a/src/gallium/state_trackers/d3d1x/winedlls/d3d10_1/d3d10_1.spec b/src/gallium/state_trackers/d3d1x/winedlls/d3d10_1/d3d10_1.spec new file mode 100644 index 0000000000..993e4bbe01 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/d3d10_1/d3d10_1.spec @@ -0,0 +1,29 @@ +@ stub D3D10CompileShader +@ stdcall D3D10CreateBlob(long ptr) d3d10.D3D10CreateBlob +@ stdcall D3D10CreateDevice1(ptr long ptr long long long ptr) d3d10.D3D10CreateDevice1 +@ stdcall D3D10CreateDeviceAndSwapChain1(ptr long ptr long long long ptr ptr ptr) d3d10.D3D10CreateDeviceAndSwapChain1 +@ stub D3D10CreateEffectFromMemory +@ stub D3D10CreateEffectPoolFromMemory +@ stub D3D10CreateStateBlock +@ stub D3D10DisassembleEffect +@ stub D3D10DisassembleShader +@ stdcall D3D10GetGeometryShaderProfile(ptr) d3d10.D3D10GetGeometryShaderProfile +@ stdcall D3D10GetInputAndOutputSignatureBlob(ptr long ptr) d3d10.D3D10GetInputAndOutputSignatureBlob +@ stdcall D3D10GetInputSignatureBlob(ptr long ptr) d3d10.D3D10GetInputSignatureBlob +@ stdcall D3D10GetOutputSignatureBlob(ptr long ptr) d3d10.D3D10GetOutputSignatureBlob +@ stdcall D3D10GetPixelShaderProfile(ptr) d3d10.D3D10GetPixelShaderProfile +@ stub D3D10GetShaderDebugInfo +@ stub D3D10GetVersion +@ stdcall D3D10GetVertexShaderProfile(ptr) d3d10.D3D10GetVertexShaderProfile +@ stub D3D10PreprocessShader +@ stub D3D10ReflectShader +@ stub D3D10RegisterLayers +@ stub D3D10StateBlockMaskDifference +@ stub D3D10StateBlockMaskDisableAll +@ stub D3D10StateBlockMaskDisableCapture +@ stub D3D10StateBlockMaskEnableAll +@ stub D3D10StateBlockMaskEnableCapture +@ stub D3D10StateBlockMaskGetSetting +@ stub D3D10StateBlockMaskIntersect +@ stub D3D10StateBlockMaskUnion + diff --git a/src/gallium/state_trackers/d3d1x/winedlls/d3d10_1/version.rc b/src/gallium/state_trackers/d3d1x/winedlls/d3d10_1/version.rc new file mode 100644 index 0000000000..0575ab8b57 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/d3d10_1/version.rc @@ -0,0 +1,3 @@ +#define FILENAME "d3d10" +#define NAME "D3D10" +#include "../version.rc.h" diff --git a/src/gallium/state_trackers/d3d1x/winedlls/d3d11/Makefile b/src/gallium/state_trackers/d3d1x/winedlls/d3d11/Makefile new file mode 100644 index 0000000000..b8d992e243 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/d3d11/Makefile @@ -0,0 +1,6 @@ +LIBNAME=d3d11 +LIBRARY_INCLUDES=-I$(D3D1X)/gd3dapi -I$(D3D1X)/d3dapi -I$(D3D1X)/w32api +OBJECTS=../../dxgid3d11/libdxgid3d11.a ../../gd3d11/libgd3d11.a ../../gd3d1x/libgd3d1x.a ../../d3d1xshader/libd3d1xshader.a ../../d3d1xstutil/libd3d1xstutil.a ../../../../auxiliary/libgallium.a +LDADD=-L../dxgi -ldxgi -ldl + +include ../Makefile.wine diff --git a/src/gallium/state_trackers/d3d1x/winedlls/d3d11/d3d11.spec b/src/gallium/state_trackers/d3d1x/winedlls/d3d11/d3d11.spec new file mode 100644 index 0000000000..1d2e0c5b93 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/d3d11/d3d11.spec @@ -0,0 +1,6 @@ +@ stub D3D11CoreCreateDevice +@ stub D3D11CoreCreateLayeredDevice +@ stub D3D11CoreGetLayeredDeviceSize +@ stub D3D11CoreRegisterLayers +@ stdcall D3D11CreateDevice(ptr long ptr long ptr long long ptr ptr ptr) +@ stdcall D3D11CreateDeviceAndSwapChain(ptr long ptr long ptr long long ptr ptr ptr ptr ptr) diff --git a/src/gallium/state_trackers/d3d1x/winedlls/d3d11/version.rc b/src/gallium/state_trackers/d3d1x/winedlls/d3d11/version.rc new file mode 100644 index 0000000000..a398678333 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/d3d11/version.rc @@ -0,0 +1,3 @@ +#define FILENAME "d3d11" +#define NAME "D3D11" +#include "../version.rc.h" diff --git a/src/gallium/state_trackers/d3d1x/winedlls/dxgi/Makefile b/src/gallium/state_trackers/d3d1x/winedlls/dxgi/Makefile new file mode 100644 index 0000000000..650bdc84d5 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/dxgi/Makefile @@ -0,0 +1,6 @@ +LIBNAME=dxgi +LIBRARY_INCLUDES=-I$(D3D1X)/gd3dapi -I$(D3D1X)/d3dapi -I$(D3D1X)/w32api +OBJECTS=dxgi_dll.o ../../dxgi/libdxgi.a ../../d3d1xstutil/libd3d1xstutil.a ../../../egl/libegl.a ../../../../auxiliary/libgallium.a ../../../../winsys/sw/xlib/libws_xlib.a +LDADD=-lgdi32 -lEGL -lXfixes -lX11 -ldrm -ldl -lXext + +include ../Makefile.wine diff --git a/src/gallium/state_trackers/d3d1x/winedlls/dxgi/dxgi.spec b/src/gallium/state_trackers/d3d1x/winedlls/dxgi/dxgi.spec new file mode 100644 index 0000000000..65a91a4583 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/dxgi/dxgi.spec @@ -0,0 +1,4 @@ +@ stdcall CreateDXGIFactory(ptr ptr) +@ stdcall CreateDXGIFactory1(ptr ptr) +@ stub DXGID3D10CreateDevice +@ stub DXGID3D10RegisterLayers diff --git a/src/gallium/state_trackers/d3d1x/winedlls/dxgi/dxgi_dll.c b/src/gallium/state_trackers/d3d1x/winedlls/dxgi/dxgi_dll.c new file mode 100644 index 0000000000..43e2980afd --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/dxgi/dxgi_dll.c @@ -0,0 +1,264 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <windows.h> +#include <winnt.h> +#include <X11/Xlib.h> +#include <galliumdxgi.h> + +#define DLL_WINE_PREATTACH 8 + +#define X11DRV_ESCAPE 6789 +#define X11DRV_GET_DISPLAY 0 +#define X11DRV_GET_DRAWABLE 1 + +/* Wine works in this way: wineserver stores the all window positions + * in (somewhat fictitious) "screen coordinates", and does not itself + * interact with X11. + * + * Instead, it is the responsibliity of the owner of the X window to + * handle ConfigureNotify and inform wineserver that the window + * moved. + * + * This means that we can freely look at window positions non-atomically, + * since they won't get updated until we return and the application + * processes the Win32 message queue. + * + * Of course, if this thread doesn't own the window, we are screwed. + * + * It might be a good idea to integrate this code in winex11.drv. + */ + +struct WineDXGIBackend +{ + const IGalliumDXGIBackendVtbl *vtbl_IGalliumDXGIBackend; + LONG ref; +}; + +static HRESULT STDMETHODCALLTYPE WineDXGIBackend_BeginPresent( + IGalliumDXGIBackend* This, + HWND hwnd, + void** ppresent_cookie, + void** pwindow, + RECT* prect, + RGNDATA** prgndata, + BOOL* ppreserve_aspect_ratio) +{ + /* this is the parent HWND which actually has an X11 window associated */ + HWND x11_hwnd; + HDC hdc; + RECT client_rect; + POINT x11_hwnd_origin_from_screen; + Drawable drawable; + POINT hwnd_origin_from_screen; + HRGN hrgn; + unsigned code = X11DRV_GET_DRAWABLE; + unsigned rgndata_size; + RGNDATA* rgndata; + RECT rgn_box; + int rgn_box_type; + + hdc = GetDC(hwnd); + GetDCOrgEx(hdc, &hwnd_origin_from_screen); + hrgn = CreateRectRgn(0, 0, 0, 0); + GetRandomRgn(hdc, hrgn, SYSRGN); + rgn_box_type = GetRgnBox(hrgn, &rgn_box); + + /* the coordinate system differs depending on whether Wine is + * pretending to be Win9x or WinNT, so match that behavior. + */ + if (!(GetVersion() & 0x80000000)) + OffsetRgn(hrgn, -hwnd_origin_from_screen.x, -hwnd_origin_from_screen.y); + ReleaseDC(hwnd, hdc); + + if(rgn_box_type == NULLREGION) + { + DeleteObject(hrgn); + return DXGI_STATUS_OCCLUDED; + } + + rgndata_size = GetRegionData(hrgn, 0, NULL); + rgndata = HeapAlloc(GetProcessHeap(), 0, rgndata_size); + GetRegionData(hrgn, rgndata_size, rgndata); + DeleteObject(hrgn); + *prgndata = rgndata; + + x11_hwnd = GetAncestor(hwnd, GA_ROOT); + hdc = GetDC(x11_hwnd); + ExtEscape(hdc, X11DRV_ESCAPE, sizeof(code), (LPSTR)&code, sizeof(drawable), (LPTSTR)&drawable); + + GetDCOrgEx(hdc, &x11_hwnd_origin_from_screen); + ReleaseDC(x11_hwnd, hdc); + + *pwindow = (void*)drawable; + GetClientRect(hwnd, &client_rect); + + prect->left = hwnd_origin_from_screen.x - x11_hwnd_origin_from_screen.x; + prect->top = hwnd_origin_from_screen.y - x11_hwnd_origin_from_screen.y; + + prect->right = prect->left + client_rect.right; + prect->bottom = prect->top + client_rect.bottom; + + // Windows doesn't preserve the aspect ratio + // TODO: maybe let the user turn this on somehow + *ppreserve_aspect_ratio = FALSE; + + *ppresent_cookie = rgndata; + + // TODO: check for errors and return them + return S_OK; +} + +static void STDMETHODCALLTYPE WineDXGIBackend_EndPresent( + IGalliumDXGIBackend* This, + HWND hwnd, + void *present_cookie) +{ + HeapFree(GetProcessHeap(), 0, present_cookie); +} + +static HRESULT STDMETHODCALLTYPE WineDXGIBackend_TestPresent( + IGalliumDXGIBackend* This, + HWND hwnd) +{ + HDC hdc; + HRGN hrgn; + RECT rgn_box; + int rgn_box_type; + + // TODO: is there a simpler way to check this? + hdc = GetDC(hwnd); + hrgn = CreateRectRgn(0, 0, 0, 0); + GetRandomRgn(hdc, hrgn, SYSRGN); + rgn_box_type = GetRgnBox(hrgn, &rgn_box); + DeleteObject(hrgn); + ReleaseDC(hwnd, hdc); + + return rgn_box_type == NULLREGION ? DXGI_STATUS_OCCLUDED : S_OK; +} + +static HRESULT STDMETHODCALLTYPE WineDXGIBackend_GetPresentSize( + IGalliumDXGIBackend* This, + HWND hwnd, + unsigned* width, + unsigned* height) +{ + RECT client_rect; + GetClientRect(hwnd, &client_rect); + *width = client_rect.right - client_rect.left; + *height = client_rect.bottom - client_rect.top; + + // TODO: check for errors and return them + return S_OK; +} + +/* Wine should switch to C++ at least to be able to implement COM interfaces in a sensible way, + * instead of this ridiculous amount of clumsy duplicated code everywhere + * C++ exists exactly to avoid having to write the following code */ +static ULONG STDMETHODCALLTYPE WineDXGIBackend_AddRef(IGalliumDXGIBackend* This) +{ + return InterlockedIncrement(&((struct WineDXGIBackend*)&This)->ref); +} + +static ULONG STDMETHODCALLTYPE WineDXGIBackend_Release(IGalliumDXGIBackend* This) +{ + ULONG v = InterlockedDecrement(&((struct WineDXGIBackend*)&This)->ref); + if(!v) + HeapFree(GetProcessHeap(), 0, This); + return v; +} + +static HRESULT WINAPI WineDXGIBackend_QueryInterface( + IGalliumDXGIBackend* iface, + REFIID riid, + void** ppvObject) +{ + if (IsEqualGUID(riid, &IID_IUnknown) + || IsEqualGUID(riid, &IID_IGalliumDXGIBackend)) + { + WineDXGIBackend_AddRef(iface); + *ppvObject = iface; + return S_OK; + } + + return E_NOINTERFACE; +} + +static IGalliumDXGIBackendVtbl WineDXGIBackend_vtbl = +{ + WineDXGIBackend_QueryInterface, + WineDXGIBackend_AddRef, + WineDXGIBackend_Release, + WineDXGIBackend_BeginPresent, + WineDXGIBackend_EndPresent, + WineDXGIBackend_TestPresent, + WineDXGIBackend_GetPresentSize +}; + +IGalliumDXGIBackend* new_WineDXGIBackend() +{ + struct WineDXGIBackend* backend = HeapAlloc(GetProcessHeap(), 0, sizeof(struct WineDXGIBackend)); + backend->ref = 1; + backend->vtbl_IGalliumDXGIBackend = &WineDXGIBackend_vtbl; + return (IGalliumDXGIBackend*)backend; +} + +static void install_wine_dxgi_backend() +{ + IGalliumDXGIBackend* backend = new_WineDXGIBackend(); + HWND root = GetDesktopWindow(); + unsigned code = X11DRV_GET_DISPLAY; + Display* dpy; + HDC hdc; + + hdc = GetDC(root); + ExtEscape(hdc, X11DRV_ESCAPE, sizeof(code), (LPSTR)&code, sizeof(dpy), (LPTSTR)&dpy); + ReleaseDC(root, hdc); + + GalliumDXGIUseX11Display(dpy, backend); + GalliumDXGIMakeDefault(); + GalliumDXGIUseNothing(); + backend->lpVtbl->Release(backend); +} + +BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) +{ + switch (fdwReason) + { + case DLL_WINE_PREATTACH: + return TRUE; + case DLL_PROCESS_ATTACH: + DisableThreadLibraryCalls(hinstDLL); + install_wine_dxgi_backend(); + break; + case DLL_PROCESS_DETACH: + break; + default: + break; + } + + return TRUE; +} diff --git a/src/gallium/state_trackers/d3d1x/winedlls/dxgi/version.rc b/src/gallium/state_trackers/d3d1x/winedlls/dxgi/version.rc new file mode 100644 index 0000000000..3653281fbc --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/dxgi/version.rc @@ -0,0 +1,3 @@ +#define FILENAME "dxgi" +#define NAME "DXGI" +#include "../version.rc.h" diff --git a/src/gallium/state_trackers/d3d1x/winedlls/version.rc.h b/src/gallium/state_trackers/d3d1x/winedlls/version.rc.h new file mode 100644 index 0000000000..096d119fa3 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/winedlls/version.rc.h @@ -0,0 +1,30 @@ +1 VERSIONINFO +FILEVERSION 6,0,6000,16386 +PRODUCTVERSION 6,0,6000,16386 +FILEFLAGSMASK 63 +FILEFLAGS 0 +FILEOS 0x00000000L +FILETYPE 0x00000002L +FILESUBTYPE 0x00000000L +{ + BLOCK "StringFileInfo" + { + BLOCK "040904E4" + { + // all Wine DLLs claim to be from Microsoft, maybe it's needed for compatibility + VALUE "CompanyName", "Microsoft Corporation" + VALUE "FileDescription", "GalliumD3D1x " NAME " runtime" + VALUE "FileVersion", "6.0.6000.16386" + VALUE "InternalName", "" + VALUE "LegalCopyright", "Copyright (c) 2010 Luca Barbieri and other contributors" + VALUE "OriginalFilename", FILENAME ".dll" + VALUE "ProductName", "GalliumD3D1x" + VALUE "ProductVersion", "6.0.6000.16386" + } + } + BLOCK "VarFileInfo" + { + VALUE "Translation", 0x0409, 0x04E4 + } +} + diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index 22e1b6dd70..770b37037f 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -49,7 +49,7 @@ dri_init_extensions(struct dri_context *ctx) } GLboolean -dri_create_context(gl_api api, const __GLcontextModes * visual, +dri_create_context(gl_api api, const struct gl_config * visual, __DRIcontext * cPriv, void *sharedContextPrivate) { __DRIscreen *sPriv = cPriv->driScreenPriv; diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h index ffe9eba13c..35105e861f 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.h +++ b/src/gallium/state_trackers/dri/common/dri_context.h @@ -61,9 +61,6 @@ struct dri_context /* gallium */ struct st_api *stapi; struct st_context_iface *st; - - /* hooks filled in by dri2 & drisw */ - __DRIimage * (*lookup_egl_image)(struct dri_context *ctx, void *handle); }; static INLINE struct dri_context * @@ -91,7 +88,7 @@ dri_get_current(__DRIscreen * driScreenPriv); boolean dri_create_context(gl_api api, - const __GLcontextModes * visual, + const struct gl_config * visual, __DRIcontext * driContextPriv, void *sharedContextPrivate); diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c index 1bdfdccf43..5fd6e7863c 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c @@ -112,7 +112,7 @@ dri_st_framebuffer_flush_front(struct st_framebuffer_iface *stfbi, boolean dri_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, - const __GLcontextModes * visual, boolean isPixmap) + const struct gl_config * visual, boolean isPixmap) { struct dri_screen *screen = sPriv->private; struct dri_drawable *drawable = NULL; diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h index 74e662d36c..837d398374 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.h +++ b/src/gallium/state_trackers/dri/common/dri_drawable.h @@ -79,7 +79,7 @@ dri_drawable(__DRIdrawable * driDrawPriv) boolean dri_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, - const __GLcontextModes * visual, boolean isPixmap); + const struct gl_config * visual, boolean isPixmap); void dri_destroy_buffer(__DRIdrawable * dPriv); diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index 475a96d196..252ad1768d 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -33,7 +33,6 @@ #include "xmlpool.h" #include "dri_screen.h" -#include "dri_context.h" #include "util/u_inlines.h" #include "pipe/p_screen.h" @@ -228,7 +227,7 @@ dri_fill_in_modes(struct dri_screen *screen, */ void dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen, - const __GLcontextModes *mode) + const struct gl_config *mode) { memset(stvis, 0, sizeof(*stvis)); @@ -287,16 +286,14 @@ dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen, static boolean dri_get_egl_image(struct st_manager *smapi, - struct st_context_iface *stctxi, void *egl_image, struct st_egl_image *stimg) { - struct dri_context *ctx = - (struct dri_context *)stctxi->st_manager_private; + struct dri_screen *screen = (struct dri_screen *)smapi; __DRIimage *img = NULL; - if (ctx->lookup_egl_image) { - img = ctx->lookup_egl_image(ctx, egl_image); + if (screen->lookup_egl_image) { + img = screen->lookup_egl_image(screen, egl_image); } if (!img) diff --git a/src/gallium/state_trackers/dri/common/dri_screen.h b/src/gallium/state_trackers/dri/common/dri_screen.h index 849f399b2f..0da9b5510f 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.h +++ b/src/gallium/state_trackers/dri/common/dri_screen.h @@ -69,6 +69,9 @@ struct dri_screen boolean sd_depth_bits_last; boolean auto_fake_front; enum pipe_texture_target target; + + /* hooks filled in by dri2 & drisw */ + __DRIimage * (*lookup_egl_image)(struct dri_screen *ctx, void *handle); }; /** cast wrapper */ @@ -111,7 +114,7 @@ dri_with_format(__DRIscreen * sPriv) void dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen, - const __GLcontextModes *mode); + const struct gl_config *mode); const __DRIconfig ** dri_init_screen_helper(struct dri_screen *screen, diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 596309bfbd..3c5b075617 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -291,25 +291,26 @@ dri2_flush_frontbuffer(struct dri_drawable *drawable, } static __DRIimage * -dri2_lookup_egl_image(struct dri_context *ctx, void *handle) +dri2_lookup_egl_image(struct dri_screen *screen, void *handle) { - __DRIimageLookupExtension *loader = ctx->sPriv->dri2.image; + __DRIimageLookupExtension *loader = screen->sPriv->dri2.image; __DRIimage *img; if (!loader->lookupEGLImage) return NULL; - img = loader->lookupEGLImage(ctx->cPriv, handle, ctx->cPriv->loaderPrivate); + img = loader->lookupEGLImage(screen->sPriv, + handle, screen->sPriv->loaderPrivate); return img; } static __DRIimage * -dri2_create_image_from_name(__DRIcontext *context, +dri2_create_image_from_name(__DRIscreen *_screen, int width, int height, int format, int name, int pitch, void *loaderPrivate) { - struct dri_screen *screen = dri_screen(context->driScreenPriv); + struct dri_screen *screen = dri_screen(_screen); __DRIimage *img; struct pipe_resource templ; struct winsys_handle whandle; @@ -501,7 +502,7 @@ static const __DRIextension *dri_screen_extensions[] = { /** * This is the driver specific part of the createNewScreen entry point. * - * Returns the __GLcontextModes supported by this driver. + * Returns the struct gl_config supported by this driver. */ static const __DRIconfig ** dri2_init_screen(__DRIscreen * sPriv) @@ -537,6 +538,7 @@ dri2_init_screen(__DRIscreen * sPriv) screen->auto_fake_front = dri_with_format(sPriv); screen->broken_invalidate = !sPriv->dri2.useInvalidate; + screen->lookup_egl_image = dri2_lookup_egl_image; return configs; fail: @@ -546,7 +548,7 @@ fail: } static boolean -dri2_create_context(gl_api api, const __GLcontextModes * visual, +dri2_create_context(gl_api api, const struct gl_config * visual, __DRIcontext * cPriv, void *sharedContextPrivate) { struct dri_context *ctx = NULL; @@ -556,15 +558,13 @@ dri2_create_context(gl_api api, const __GLcontextModes * visual, ctx = cPriv->driverPrivate; - ctx->lookup_egl_image = dri2_lookup_egl_image; - return TRUE; } static boolean dri2_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, - const __GLcontextModes * visual, boolean isPixmap) + const struct gl_config * visual, boolean isPixmap) { struct dri_drawable *drawable = NULL; diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index 04bba631ae..c48cc44036 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -298,7 +298,7 @@ fail: static boolean drisw_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, - const __GLcontextModes * visual, boolean isPixmap) + const struct gl_config * visual, boolean isPixmap) { struct dri_drawable *drawable = NULL; diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile index 4199d7c6ba..8dbfc5b8e5 100644 --- a/src/gallium/state_trackers/egl/Makefile +++ b/src/gallium/state_trackers/egl/Makefile @@ -24,9 +24,9 @@ x11_SOURCES = $(wildcard x11/*.c) \ x11_OBJECTS = $(x11_SOURCES:.c=.o) -kms_INCLUDES = -I$(TOP)/src/gallium/winsys $(shell pkg-config --cflags-only-I libdrm) -kms_SOURCES = $(wildcard kms/*.c) -kms_OBJECTS = $(kms_SOURCES:.c=.o) +drm_INCLUDES = -I$(TOP)/src/gallium/winsys $(shell pkg-config --cflags-only-I libdrm) +drm_SOURCES = $(wildcard drm/*.c) +drm_OBJECTS = $(drm_SOURCES:.c=.o) fbdev_INCLUDES = -I$(TOP)/src/gallium/winsys/sw @@ -34,8 +34,8 @@ fbdev_SOURCES = $(wildcard fbdev/*.c) fbdev_OBJECTS = $(fbdev_SOURCES:.c=.o) -ALL_INCLUDES = $(common_INCLUDES) $(x11_INCLUDES) $(kms_INCLUDES) $(fbdev_INCLUDES) -ALL_SOURCES = $(common_SOURCES) $(x11_SOURCES) $(kms_SOURCES) $(fbdev_SOURCES) +ALL_INCLUDES = $(common_INCLUDES) $(x11_INCLUDES) $(drm_INCLUDES) $(fbdev_INCLUDES) +ALL_SOURCES = $(common_SOURCES) $(x11_SOURCES) $(drm_SOURCES) $(fbdev_SOURCES) EGL_OBJECTS = $(common_OBJECTS) EGL_CPPFLAGS = $(common_INCLUDES) @@ -45,9 +45,9 @@ ifneq ($(findstring x11, $(EGL_PLATFORMS)),) EGL_OBJECTS += $(x11_OBJECTS) EGL_CPPFLAGS += -DHAVE_X11_BACKEND endif -ifneq ($(findstring kms, $(EGL_PLATFORMS)),) -EGL_OBJECTS += $(kms_OBJECTS) -EGL_CPPFLAGS += -DHAVE_KMS_BACKEND +ifneq ($(findstring drm, $(EGL_PLATFORMS)),) +EGL_OBJECTS += $(drm_OBJECTS) +EGL_CPPFLAGS += -DHAVE_DRM_BACKEND endif ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) EGL_OBJECTS += $(fbdev_OBJECTS) @@ -87,8 +87,8 @@ $(common_OBJECTS): %.o: %.c $(x11_OBJECTS): %.o: %.c $(call egl-cc,x11) -$(kms_OBJECTS): %.o: %.c - $(call egl-cc,kms) +$(drm_OBJECTS): %.o: %.c + $(call egl-cc,drm) $(fbdev_OBJECTS): %.o: %.c $(call egl-cc,fbdev) diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index 33a838fb79..aaa2ff6bb2 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -65,8 +65,8 @@ egl_g3d_get_platform(_EGLDriver *drv, _EGLPlatformType plat) break; case _EGL_PLATFORM_DRM: plat_name = "DRM"; -#ifdef HAVE_KMS_BACKEND - nplat = native_get_kms_platform(); +#ifdef HAVE_DRM_BACKEND + nplat = native_get_drm_platform(); #endif break; case _EGL_PLATFORM_FBDEV: @@ -194,53 +194,48 @@ init_config_attributes(_EGLConfig *conf, const struct native_config *nconf, if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT)) surface_type |= EGL_PBUFFER_BIT; - SET_CONFIG_ATTRIB(conf, EGL_CONFORMANT, api_mask); - SET_CONFIG_ATTRIB(conf, EGL_RENDERABLE_TYPE, api_mask); + conf->Conformant = api_mask; + conf->RenderableType = api_mask; - SET_CONFIG_ATTRIB(conf, EGL_RED_SIZE, rgba[0]); - SET_CONFIG_ATTRIB(conf, EGL_GREEN_SIZE, rgba[1]); - SET_CONFIG_ATTRIB(conf, EGL_BLUE_SIZE, rgba[2]); - SET_CONFIG_ATTRIB(conf, EGL_ALPHA_SIZE, rgba[3]); - SET_CONFIG_ATTRIB(conf, EGL_BUFFER_SIZE, buffer_size); + conf->RedSize = rgba[0]; + conf->GreenSize = rgba[1]; + conf->BlueSize = rgba[2]; + conf->AlphaSize = rgba[3]; + conf->BufferSize = buffer_size; - SET_CONFIG_ATTRIB(conf, EGL_DEPTH_SIZE, depth_stencil[0]); - SET_CONFIG_ATTRIB(conf, EGL_STENCIL_SIZE, depth_stencil[1]); + conf->DepthSize = depth_stencil[0]; + conf->StencilSize = depth_stencil[1]; - SET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE, surface_type); + conf->SurfaceType = surface_type; - SET_CONFIG_ATTRIB(conf, EGL_NATIVE_RENDERABLE, EGL_TRUE); + conf->NativeRenderable = EGL_TRUE; if (surface_type & EGL_WINDOW_BIT) { - SET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_ID, nconf->native_visual_id); - SET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_TYPE, - nconf->native_visual_type); + conf->NativeVisualID = nconf->native_visual_id; + conf->NativeVisualType = nconf->native_visual_type; } if (surface_type & EGL_PBUFFER_BIT) { - SET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGB, EGL_TRUE); + conf->BindToTextureRGB = EGL_TRUE; if (rgba[3]) - SET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGBA, EGL_TRUE); + conf->BindToTextureRGBA = EGL_TRUE; - SET_CONFIG_ATTRIB(conf, EGL_MAX_PBUFFER_WIDTH, 4096); - SET_CONFIG_ATTRIB(conf, EGL_MAX_PBUFFER_HEIGHT, 4096); - SET_CONFIG_ATTRIB(conf, EGL_MAX_PBUFFER_PIXELS, 4096 * 4096); + conf->MaxPbufferWidth = 4096; + conf->MaxPbufferHeight = 4096; + conf->MaxPbufferPixels = 4096 * 4096; } - SET_CONFIG_ATTRIB(conf, EGL_LEVEL, nconf->level); - SET_CONFIG_ATTRIB(conf, EGL_SAMPLES, nconf->samples); - SET_CONFIG_ATTRIB(conf, EGL_SAMPLE_BUFFERS, 1); + conf->Level = nconf->level; + conf->Samples = nconf->samples; + conf->SampleBuffers = 0; if (nconf->slow_config) - SET_CONFIG_ATTRIB(conf, EGL_CONFIG_CAVEAT, EGL_SLOW_CONFIG); + conf->ConfigCaveat = EGL_SLOW_CONFIG; if (nconf->transparent_rgb) { - rgba[0] = nconf->transparent_rgb_values[0]; - rgba[1] = nconf->transparent_rgb_values[1]; - rgba[2] = nconf->transparent_rgb_values[2]; - - SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_TYPE, EGL_TRANSPARENT_RGB); - SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_RED_VALUE, rgba[0]); - SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_GREEN_VALUE, rgba[1]); - SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_BLUE_VALUE, rgba[2]); + conf->TransparentType = EGL_TRANSPARENT_RGB; + conf->TransparentRedValue = nconf->transparent_rgb_values[0]; + conf->TransparentGreenValue = nconf->transparent_rgb_values[1]; + conf->TransparentBlueValue = nconf->transparent_rgb_values[2]; } return _eglValidateConfig(conf, EGL_FALSE); @@ -258,6 +253,10 @@ egl_g3d_init_config(_EGLDriver *drv, _EGLDisplay *dpy, EGLint buffer_mask, api_mask; EGLBoolean valid; + /* skip single-buffered configs */ + if (!(nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT))) + return EGL_FALSE; + buffer_mask = 0x0; if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_FRONT_LEFT)) buffer_mask |= ST_ATTACHMENT_FRONT_LEFT_MASK; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c index c0164daf9c..3bde39737b 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c @@ -609,8 +609,10 @@ egl_g3d_wait_client(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) gctx->stctxi->flush(gctx->stctxi, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, &fence); - screen->fence_finish(screen, fence, 0); - screen->fence_reference(screen, &fence, NULL); + if (fence) { + screen->fence_finish(screen, fence, 0); + screen->fence_reference(screen, &fence, NULL); + } return EGL_TRUE; } diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c index 05cdb0d421..0affe632cf 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c @@ -51,7 +51,6 @@ egl_g3d_st_manager(struct st_manager *smapi) static boolean egl_g3d_st_manager_get_egl_image(struct st_manager *smapi, - struct st_context_iface *stctx, void *egl_image, struct st_egl_image *out) { diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h index 9f34c517ef..3c3f57e267 100644 --- a/src/gallium/state_trackers/egl/common/native.h +++ b/src/gallium/state_trackers/egl/common/native.h @@ -227,7 +227,7 @@ const struct native_platform * native_get_x11_platform(void); const struct native_platform * -native_get_kms_platform(void); +native_get_drm_platform(void); const struct native_platform * native_get_fbdev_platform(void); diff --git a/src/gallium/state_trackers/egl/drm/modeset.c b/src/gallium/state_trackers/egl/drm/modeset.c new file mode 100644 index 0000000000..06a6077053 --- /dev/null +++ b/src/gallium/state_trackers/egl/drm/modeset.c @@ -0,0 +1,619 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "egllog.h" + +#include "native_drm.h" + +static boolean +drm_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_resource **textures, + int *width, int *height) +{ + struct drm_surface *drmsurf = drm_surface(nsurf); + + if (!resource_surface_add_resources(drmsurf->rsurf, attachment_mask)) + return FALSE; + if (textures) + resource_surface_get_resources(drmsurf->rsurf, textures, attachment_mask); + + if (seq_num) + *seq_num = drmsurf->sequence_number; + if (width) + *width = drmsurf->width; + if (height) + *height = drmsurf->height; + + return TRUE; +} + +/** + * Add textures as DRM framebuffers. + */ +static boolean +drm_surface_init_framebuffers(struct native_surface *nsurf, boolean need_back) +{ + struct drm_surface *drmsurf = drm_surface(nsurf); + struct drm_display *drmdpy = drmsurf->drmdpy; + int num_framebuffers = (need_back) ? 2 : 1; + int i, err; + + for (i = 0; i < num_framebuffers; i++) { + struct drm_framebuffer *fb; + enum native_attachment natt; + struct winsys_handle whandle; + uint block_bits; + + if (i == 0) { + fb = &drmsurf->front_fb; + natt = NATIVE_ATTACHMENT_FRONT_LEFT; + } + else { + fb = &drmsurf->back_fb; + natt = NATIVE_ATTACHMENT_BACK_LEFT; + } + + if (!fb->texture) { + /* make sure the texture has been allocated */ + resource_surface_add_resources(drmsurf->rsurf, 1 << natt); + fb->texture = + resource_surface_get_single_resource(drmsurf->rsurf, natt); + if (!fb->texture) + return FALSE; + } + + /* already initialized */ + if (fb->buffer_id) + continue; + + /* TODO detect the real value */ + fb->is_passive = TRUE; + + memset(&whandle, 0, sizeof(whandle)); + whandle.type = DRM_API_HANDLE_TYPE_KMS; + + if (!drmdpy->base.screen->resource_get_handle(drmdpy->base.screen, + fb->texture, &whandle)) + return FALSE; + + block_bits = util_format_get_blocksizebits(drmsurf->color_format); + err = drmModeAddFB(drmdpy->fd, drmsurf->width, drmsurf->height, + block_bits, block_bits, whandle.stride, whandle.handle, + &fb->buffer_id); + if (err) { + fb->buffer_id = 0; + return FALSE; + } + } + + return TRUE; +} + +static boolean +drm_surface_flush_frontbuffer(struct native_surface *nsurf) +{ +#ifdef DRM_MODE_FEATURE_DIRTYFB + struct drm_surface *drmsurf = drm_surface(nsurf); + struct drm_display *drmdpy = drmsurf->drmdpy; + + if (drmsurf->front_fb.is_passive) + drmModeDirtyFB(drmdpy->fd, drmsurf->front_fb.buffer_id, NULL, 0); +#endif + + return TRUE; +} + +static boolean +drm_surface_swap_buffers(struct native_surface *nsurf) +{ + struct drm_surface *drmsurf = drm_surface(nsurf); + struct drm_crtc *drmcrtc = &drmsurf->current_crtc; + struct drm_display *drmdpy = drmsurf->drmdpy; + struct drm_framebuffer tmp_fb; + int err; + + if (!drmsurf->back_fb.buffer_id) { + if (!drm_surface_init_framebuffers(&drmsurf->base, TRUE)) + return FALSE; + } + + if (drmsurf->is_shown && drmcrtc->crtc) { + err = drmModeSetCrtc(drmdpy->fd, drmcrtc->crtc->crtc_id, + drmsurf->back_fb.buffer_id, drmcrtc->crtc->x, drmcrtc->crtc->y, + drmcrtc->connectors, drmcrtc->num_connectors, &drmcrtc->crtc->mode); + if (err) + return FALSE; + } + + /* swap the buffers */ + tmp_fb = drmsurf->front_fb; + drmsurf->front_fb = drmsurf->back_fb; + drmsurf->back_fb = tmp_fb; + + resource_surface_swap_buffers(drmsurf->rsurf, + NATIVE_ATTACHMENT_FRONT_LEFT, NATIVE_ATTACHMENT_BACK_LEFT, FALSE); + /* the front/back textures are swapped */ + drmsurf->sequence_number++; + drmdpy->event_handler->invalid_surface(&drmdpy->base, + &drmsurf->base, drmsurf->sequence_number); + + return TRUE; +} + +static void +drm_surface_wait(struct native_surface *nsurf) +{ + /* no-op */ +} + +static void +drm_surface_destroy(struct native_surface *nsurf) +{ + struct drm_surface *drmsurf = drm_surface(nsurf); + + if (drmsurf->current_crtc.crtc) + drmModeFreeCrtc(drmsurf->current_crtc.crtc); + + if (drmsurf->front_fb.buffer_id) + drmModeRmFB(drmsurf->drmdpy->fd, drmsurf->front_fb.buffer_id); + pipe_resource_reference(&drmsurf->front_fb.texture, NULL); + + if (drmsurf->back_fb.buffer_id) + drmModeRmFB(drmsurf->drmdpy->fd, drmsurf->back_fb.buffer_id); + pipe_resource_reference(&drmsurf->back_fb.texture, NULL); + + resource_surface_destroy(drmsurf->rsurf); + FREE(drmsurf); +} + +static struct drm_surface * +drm_display_create_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct drm_display *drmdpy = drm_display(ndpy); + struct drm_config *drmconf = drm_config(nconf); + struct drm_surface *drmsurf; + + drmsurf = CALLOC_STRUCT(drm_surface); + if (!drmsurf) + return NULL; + + drmsurf->drmdpy = drmdpy; + drmsurf->color_format = drmconf->base.color_format; + drmsurf->width = width; + drmsurf->height = height; + + drmsurf->rsurf = resource_surface_create(drmdpy->base.screen, + drmsurf->color_format, + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT); + if (!drmsurf->rsurf) { + FREE(drmsurf); + return NULL; + } + + resource_surface_set_size(drmsurf->rsurf, drmsurf->width, drmsurf->height); + + drmsurf->base.destroy = drm_surface_destroy; + drmsurf->base.swap_buffers = drm_surface_swap_buffers; + drmsurf->base.flush_frontbuffer = drm_surface_flush_frontbuffer; + drmsurf->base.validate = drm_surface_validate; + drmsurf->base.wait = drm_surface_wait; + + return drmsurf; +} + +/** + * Choose a CRTC that supports all given connectors. + */ +static uint32_t +drm_display_choose_crtc(struct native_display *ndpy, + uint32_t *connectors, int num_connectors) +{ + struct drm_display *drmdpy = drm_display(ndpy); + int idx; + + for (idx = 0; idx < drmdpy->resources->count_crtcs; idx++) { + boolean found_crtc = TRUE; + int i, j; + + for (i = 0; i < num_connectors; i++) { + drmModeConnectorPtr connector; + int encoder_idx = -1; + + connector = drmModeGetConnector(drmdpy->fd, connectors[i]); + if (!connector) { + found_crtc = FALSE; + break; + } + + /* find an encoder the CRTC supports */ + for (j = 0; j < connector->count_encoders; j++) { + drmModeEncoderPtr encoder = + drmModeGetEncoder(drmdpy->fd, connector->encoders[j]); + if (encoder->possible_crtcs & (1 << idx)) { + encoder_idx = j; + break; + } + drmModeFreeEncoder(encoder); + } + + drmModeFreeConnector(connector); + if (encoder_idx < 0) { + found_crtc = FALSE; + break; + } + } + + if (found_crtc) + break; + } + + if (idx >= drmdpy->resources->count_crtcs) { + _eglLog(_EGL_WARNING, + "failed to find a CRTC that supports the given %d connectors", + num_connectors); + return 0; + } + + return drmdpy->resources->crtcs[idx]; +} + +/** + * Remember the original CRTC status and set the CRTC + */ +static boolean +drm_display_set_crtc(struct native_display *ndpy, int crtc_idx, + uint32_t buffer_id, uint32_t x, uint32_t y, + uint32_t *connectors, int num_connectors, + drmModeModeInfoPtr mode) +{ + struct drm_display *drmdpy = drm_display(ndpy); + struct drm_crtc *drmcrtc = &drmdpy->saved_crtcs[crtc_idx]; + uint32_t crtc_id; + int err; + + if (drmcrtc->crtc) { + crtc_id = drmcrtc->crtc->crtc_id; + } + else { + int count = 0, i; + + /* + * Choose the CRTC once. It could be more dynamic, but let's keep it + * simple for now. + */ + crtc_id = drm_display_choose_crtc(&drmdpy->base, + connectors, num_connectors); + + /* save the original CRTC status */ + drmcrtc->crtc = drmModeGetCrtc(drmdpy->fd, crtc_id); + if (!drmcrtc->crtc) + return FALSE; + + for (i = 0; i < drmdpy->num_connectors; i++) { + struct drm_connector *drmconn = &drmdpy->connectors[i]; + drmModeConnectorPtr connector = drmconn->connector; + drmModeEncoderPtr encoder; + + encoder = drmModeGetEncoder(drmdpy->fd, connector->encoder_id); + if (encoder) { + if (encoder->crtc_id == crtc_id) { + drmcrtc->connectors[count++] = connector->connector_id; + if (count >= Elements(drmcrtc->connectors)) + break; + } + drmModeFreeEncoder(encoder); + } + } + + drmcrtc->num_connectors = count; + } + + err = drmModeSetCrtc(drmdpy->fd, crtc_id, buffer_id, x, y, + connectors, num_connectors, mode); + if (err) { + drmModeFreeCrtc(drmcrtc->crtc); + drmcrtc->crtc = NULL; + drmcrtc->num_connectors = 0; + + return FALSE; + } + + return TRUE; +} + +static boolean +drm_display_program(struct native_display *ndpy, int crtc_idx, + struct native_surface *nsurf, uint x, uint y, + const struct native_connector **nconns, int num_nconns, + const struct native_mode *nmode) +{ + struct drm_display *drmdpy = drm_display(ndpy); + struct drm_surface *drmsurf = drm_surface(nsurf); + const struct drm_mode *drmmode = drm_mode(nmode); + uint32_t connector_ids[32]; + uint32_t buffer_id; + drmModeModeInfo mode_tmp, *mode; + int i; + + if (num_nconns > Elements(connector_ids)) { + _eglLog(_EGL_WARNING, "too many connectors (%d)", num_nconns); + num_nconns = Elements(connector_ids); + } + + if (drmsurf) { + if (!drm_surface_init_framebuffers(&drmsurf->base, FALSE)) + return FALSE; + + buffer_id = drmsurf->front_fb.buffer_id; + /* the mode argument of drmModeSetCrtc is not constified */ + mode_tmp = drmmode->mode; + mode = &mode_tmp; + } + else { + /* disable the CRTC */ + buffer_id = 0; + mode = NULL; + num_nconns = 0; + } + + for (i = 0; i < num_nconns; i++) { + struct drm_connector *drmconn = drm_connector(nconns[i]); + connector_ids[i] = drmconn->connector->connector_id; + } + + if (!drm_display_set_crtc(&drmdpy->base, crtc_idx, buffer_id, x, y, + connector_ids, num_nconns, mode)) { + _eglLog(_EGL_WARNING, "failed to set CRTC %d", crtc_idx); + + return FALSE; + } + + if (drmdpy->shown_surfaces[crtc_idx]) + drmdpy->shown_surfaces[crtc_idx]->is_shown = FALSE; + drmdpy->shown_surfaces[crtc_idx] = drmsurf; + + /* remember the settings for buffer swapping */ + if (drmsurf) { + uint32_t crtc_id = drmdpy->saved_crtcs[crtc_idx].crtc->crtc_id; + struct drm_crtc *drmcrtc = &drmsurf->current_crtc; + + if (drmcrtc->crtc) + drmModeFreeCrtc(drmcrtc->crtc); + drmcrtc->crtc = drmModeGetCrtc(drmdpy->fd, crtc_id); + + assert(num_nconns < Elements(drmcrtc->connectors)); + memcpy(drmcrtc->connectors, connector_ids, + sizeof(*connector_ids) * num_nconns); + drmcrtc->num_connectors = num_nconns; + + drmsurf->is_shown = TRUE; + } + + return TRUE; +} + +static const struct native_mode ** +drm_display_get_modes(struct native_display *ndpy, + const struct native_connector *nconn, + int *num_modes) +{ + struct drm_display *drmdpy = drm_display(ndpy); + struct drm_connector *drmconn = drm_connector(nconn); + const struct native_mode **nmodes_return; + int count, i; + + /* delete old data */ + if (drmconn->connector) { + drmModeFreeConnector(drmconn->connector); + FREE(drmconn->drm_modes); + + drmconn->connector = NULL; + drmconn->drm_modes = NULL; + drmconn->num_modes = 0; + } + + /* detect again */ + drmconn->connector = drmModeGetConnector(drmdpy->fd, drmconn->connector_id); + if (!drmconn->connector) + return NULL; + + count = drmconn->connector->count_modes; + drmconn->drm_modes = CALLOC(count, sizeof(*drmconn->drm_modes)); + if (!drmconn->drm_modes) { + drmModeFreeConnector(drmconn->connector); + drmconn->connector = NULL; + + return NULL; + } + + for (i = 0; i < count; i++) { + struct drm_mode *drmmode = &drmconn->drm_modes[i]; + drmModeModeInfoPtr mode = &drmconn->connector->modes[i]; + + drmmode->mode = *mode; + + drmmode->base.desc = drmmode->mode.name; + drmmode->base.width = drmmode->mode.hdisplay; + drmmode->base.height = drmmode->mode.vdisplay; + drmmode->base.refresh_rate = drmmode->mode.vrefresh; + /* not all kernels have vrefresh = refresh_rate * 1000 */ + if (drmmode->base.refresh_rate > 1000) + drmmode->base.refresh_rate = (drmmode->base.refresh_rate + 500) / 1000; + } + + nmodes_return = MALLOC(count * sizeof(*nmodes_return)); + if (nmodes_return) { + for (i = 0; i < count; i++) + nmodes_return[i] = &drmconn->drm_modes[i].base; + if (num_modes) + *num_modes = count; + } + + return nmodes_return; +} + +static const struct native_connector ** +drm_display_get_connectors(struct native_display *ndpy, int *num_connectors, + int *num_crtc) +{ + struct drm_display *drmdpy = drm_display(ndpy); + const struct native_connector **connectors; + int i; + + if (!drmdpy->connectors) { + drmdpy->connectors = + CALLOC(drmdpy->resources->count_connectors, sizeof(*drmdpy->connectors)); + if (!drmdpy->connectors) + return NULL; + + for (i = 0; i < drmdpy->resources->count_connectors; i++) { + struct drm_connector *drmconn = &drmdpy->connectors[i]; + + drmconn->connector_id = drmdpy->resources->connectors[i]; + /* drmconn->connector is allocated when the modes are asked */ + } + + drmdpy->num_connectors = drmdpy->resources->count_connectors; + } + + connectors = MALLOC(drmdpy->num_connectors * sizeof(*connectors)); + if (connectors) { + for (i = 0; i < drmdpy->num_connectors; i++) + connectors[i] = &drmdpy->connectors[i].base; + if (num_connectors) + *num_connectors = drmdpy->num_connectors; + } + + if (num_crtc) + *num_crtc = drmdpy->resources->count_crtcs; + + return connectors; +} + +static struct native_surface * +drm_display_create_scanout_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct drm_surface *drmsurf; + + drmsurf = drm_display_create_surface(ndpy, nconf, width, height); + return &drmsurf->base; +} + +static struct native_display_modeset drm_display_modeset = { + .get_connectors = drm_display_get_connectors, + .get_modes = drm_display_get_modes, + .create_scanout_surface = drm_display_create_scanout_surface, + .program = drm_display_program +}; + +void +drm_display_fini_modeset(struct native_display *ndpy) +{ + struct drm_display *drmdpy = drm_display(ndpy); + int i; + + if (drmdpy->connectors) { + for (i = 0; i < drmdpy->num_connectors; i++) { + struct drm_connector *drmconn = &drmdpy->connectors[i]; + if (drmconn->connector) { + drmModeFreeConnector(drmconn->connector); + FREE(drmconn->drm_modes); + } + } + FREE(drmdpy->connectors); + } + + if (drmdpy->shown_surfaces) { + FREE(drmdpy->shown_surfaces); + drmdpy->shown_surfaces = NULL; + } + + if (drmdpy->saved_crtcs) { + for (i = 0; i < drmdpy->resources->count_crtcs; i++) { + struct drm_crtc *drmcrtc = &drmdpy->saved_crtcs[i]; + + if (drmcrtc->crtc) { + /* restore crtc */ + drmModeSetCrtc(drmdpy->fd, drmcrtc->crtc->crtc_id, + drmcrtc->crtc->buffer_id, drmcrtc->crtc->x, drmcrtc->crtc->y, + drmcrtc->connectors, drmcrtc->num_connectors, + &drmcrtc->crtc->mode); + + drmModeFreeCrtc(drmcrtc->crtc); + } + } + FREE(drmdpy->saved_crtcs); + } + + if (drmdpy->resources) { + drmModeFreeResources(drmdpy->resources); + drmdpy->resources = NULL; + } + + drmdpy->base.modeset = NULL; +} + +boolean +drm_display_init_modeset(struct native_display *ndpy) +{ + struct drm_display *drmdpy = drm_display(ndpy); + + /* resources are fixed, unlike crtc, connector, or encoder */ + drmdpy->resources = drmModeGetResources(drmdpy->fd); + if (!drmdpy->resources) { + _eglLog(_EGL_DEBUG, "Failed to get KMS resources. Disable modeset."); + return FALSE; + } + + drmdpy->saved_crtcs = + CALLOC(drmdpy->resources->count_crtcs, sizeof(*drmdpy->saved_crtcs)); + if (!drmdpy->saved_crtcs) { + drm_display_fini_modeset(&drmdpy->base); + return FALSE; + } + + drmdpy->shown_surfaces = + CALLOC(drmdpy->resources->count_crtcs, sizeof(*drmdpy->shown_surfaces)); + if (!drmdpy->shown_surfaces) { + drm_display_fini_modeset(&drmdpy->base); + return FALSE; + } + + drmdpy->base.modeset = &drm_display_modeset; + + return TRUE; +} diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c new file mode 100644 index 0000000000..f6dc558437 --- /dev/null +++ b/src/gallium/state_trackers/egl/drm/native_drm.c @@ -0,0 +1,240 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2010 Chia-I Wu <olv@0xlab.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "util/u_memory.h" +#include "egllog.h" + +#include "native_drm.h" + +/* see get_drm_screen_name */ +#include <radeon_drm.h> +#include "radeon/drm/radeon_drm.h" + +static boolean +drm_display_is_format_supported(struct native_display *ndpy, + enum pipe_format fmt, boolean is_color) +{ + return ndpy->screen->is_format_supported(ndpy->screen, + fmt, PIPE_TEXTURE_2D, 0, + (is_color) ? PIPE_BIND_RENDER_TARGET : + PIPE_BIND_DEPTH_STENCIL, 0); +} + +static const struct native_config ** +drm_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct drm_display *drmdpy = drm_display(ndpy); + const struct native_config **configs; + + /* first time */ + if (!drmdpy->config) { + struct native_config *nconf; + enum pipe_format format; + + drmdpy->config = CALLOC(1, sizeof(*drmdpy->config)); + if (!drmdpy->config) + return NULL; + + nconf = &drmdpy->config->base; + + nconf->buffer_mask = + (1 << NATIVE_ATTACHMENT_FRONT_LEFT) | + (1 << NATIVE_ATTACHMENT_BACK_LEFT); + + format = PIPE_FORMAT_B8G8R8A8_UNORM; + if (!drm_display_is_format_supported(&drmdpy->base, format, TRUE)) { + format = PIPE_FORMAT_A8R8G8B8_UNORM; + if (!drm_display_is_format_supported(&drmdpy->base, format, TRUE)) + format = PIPE_FORMAT_NONE; + } + if (format == PIPE_FORMAT_NONE) { + FREE(drmdpy->config); + drmdpy->config = NULL; + return NULL; + } + + nconf->color_format = format; + + /* support KMS */ + if (drmdpy->resources) + nconf->scanout_bit = TRUE; + } + + configs = MALLOC(sizeof(*configs)); + if (configs) { + configs[0] = &drmdpy->config->base; + if (num_configs) + *num_configs = 1; + } + + return configs; +} + +static int +drm_display_get_param(struct native_display *ndpy, + enum native_param_type param) +{ + int val; + + switch (param) { + default: + val = 0; + break; + } + + return val; +} + +static void +drm_display_destroy(struct native_display *ndpy) +{ + struct drm_display *drmdpy = drm_display(ndpy); + + if (drmdpy->config) + FREE(drmdpy->config); + + drm_display_fini_modeset(&drmdpy->base); + + if (drmdpy->base.screen) + drmdpy->base.screen->destroy(drmdpy->base.screen); + + if (drmdpy->fd >= 0) + close(drmdpy->fd); + + FREE(drmdpy); +} + +static const char * +get_drm_screen_name(int fd, drmVersionPtr version) +{ + const char *name = version->name; + + if (name && !strcmp(name, "radeon")) { + int chip_id; + struct drm_radeon_info info; + + memset(&info, 0, sizeof(info)); + info.request = RADEON_INFO_DEVICE_ID; + info.value = pointer_to_intptr(&chip_id); + if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0) + return NULL; + + name = is_r3xx(chip_id) ? "r300" : "r600"; + } + + return name; +} + +/** + * Initialize KMS and pipe screen. + */ +static boolean +drm_display_init_screen(struct native_display *ndpy) +{ + struct drm_display *drmdpy = drm_display(ndpy); + drmVersionPtr version; + const char *name; + + version = drmGetVersion(drmdpy->fd); + if (!version) { + _eglLog(_EGL_WARNING, "invalid fd %d", drmdpy->fd); + return FALSE; + } + + name = get_drm_screen_name(drmdpy->fd, version); + if (name) { + drmdpy->base.screen = + drmdpy->event_handler->new_drm_screen(&drmdpy->base, name, drmdpy->fd); + } + drmFreeVersion(version); + + if (!drmdpy->base.screen) { + _eglLog(_EGL_WARNING, "failed to create DRM screen"); + return FALSE; + } + + return TRUE; +} + +static struct native_display * +drm_create_display(int fd, struct native_event_handler *event_handler, + void *user_data) +{ + struct drm_display *drmdpy; + + drmdpy = CALLOC_STRUCT(drm_display); + if (!drmdpy) + return NULL; + + drmdpy->fd = fd; + drmdpy->event_handler = event_handler; + drmdpy->base.user_data = user_data; + + if (!drm_display_init_screen(&drmdpy->base)) { + drm_display_destroy(&drmdpy->base); + return NULL; + } + + drmdpy->base.destroy = drm_display_destroy; + drmdpy->base.get_param = drm_display_get_param; + drmdpy->base.get_configs = drm_display_get_configs; + + drm_display_init_modeset(&drmdpy->base); + + return &drmdpy->base; +} + +static struct native_display * +native_create_display(void *dpy, struct native_event_handler *event_handler, + void *user_data) +{ + int fd; + + if (dpy) { + fd = dup((int) pointer_to_intptr(dpy)); + } + else { + fd = open("/dev/dri/card0", O_RDWR); + } + if (fd < 0) + return NULL; + + return drm_create_display(fd, event_handler, user_data); +} + +static const struct native_platform drm_platform = { + "DRM", /* name */ + native_create_display +}; + +const struct native_platform * +native_get_drm_platform(void) +{ + return &drm_platform; +} diff --git a/src/gallium/state_trackers/egl/kms/native_kms.h b/src/gallium/state_trackers/egl/drm/native_drm.h index cd8e4ff0b2..03c4fe01dc 100644 --- a/src/gallium/state_trackers/egl/kms/native_kms.h +++ b/src/gallium/state_trackers/egl/drm/native_drm.h @@ -23,8 +23,8 @@ * DEALINGS IN THE SOFTWARE. */ -#ifndef _NATIVE_KMS_H_ -#define _NATIVE_KMS_H_ +#ifndef _NATIVE_DRM_H_ +#define _NATIVE_DRM_H_ #include <xf86drm.h> #include <xf86drmMode.h> @@ -37,101 +37,110 @@ #include "common/native.h" #include "common/native_helper.h" -struct kms_config; -struct kms_connector; -struct kms_mode; +struct drm_config; +struct drm_crtc; +struct drm_connector; +struct drm_mode; +struct drm_surface; -struct kms_crtc { - drmModeCrtcPtr crtc; - uint32_t connectors[32]; - int num_connectors; -}; - -struct kms_display { +struct drm_display { struct native_display base; struct native_event_handler *event_handler; int fd; - drmModeResPtr resources; - struct kms_config *config; + struct drm_config *config; - struct kms_connector *connectors; + /* for modesetting */ + drmModeResPtr resources; + struct drm_connector *connectors; int num_connectors; - struct kms_surface **shown_surfaces; + struct drm_surface **shown_surfaces; /* save the original settings of the CRTCs */ - struct kms_crtc *saved_crtcs; + struct drm_crtc *saved_crtcs; +}; + +struct drm_config { + struct native_config base; }; -struct kms_framebuffer { +struct drm_crtc { + drmModeCrtcPtr crtc; + uint32_t connectors[32]; + int num_connectors; +}; + +struct drm_framebuffer { struct pipe_resource *texture; boolean is_passive; uint32_t buffer_id; }; -struct kms_surface { +struct drm_surface { struct native_surface base; - struct kms_display *kdpy; + struct drm_display *drmdpy; struct resource_surface *rsurf; enum pipe_format color_format; int width, height; unsigned int sequence_number; - struct kms_framebuffer front_fb, back_fb; + struct drm_framebuffer front_fb, back_fb; boolean is_shown; - struct kms_crtc current_crtc; + struct drm_crtc current_crtc; }; -struct kms_config { - struct native_config base; -}; - -struct kms_connector { +struct drm_connector { struct native_connector base; uint32_t connector_id; drmModeConnectorPtr connector; - struct kms_mode *kms_modes; + struct drm_mode *drm_modes; int num_modes; }; -struct kms_mode { +struct drm_mode { struct native_mode base; drmModeModeInfo mode; }; -static INLINE struct kms_display * -kms_display(const struct native_display *ndpy) +static INLINE struct drm_display * +drm_display(const struct native_display *ndpy) { - return (struct kms_display *) ndpy; + return (struct drm_display *) ndpy; } -static INLINE struct kms_surface * -kms_surface(const struct native_surface *nsurf) +static INLINE struct drm_config * +drm_config(const struct native_config *nconf) { - return (struct kms_surface *) nsurf; + return (struct drm_config *) nconf; } -static INLINE struct kms_config * -kms_config(const struct native_config *nconf) +static INLINE struct drm_surface * +drm_surface(const struct native_surface *nsurf) { - return (struct kms_config *) nconf; + return (struct drm_surface *) nsurf; } -static INLINE struct kms_connector * -kms_connector(const struct native_connector *nconn) +static INLINE struct drm_connector * +drm_connector(const struct native_connector *nconn) { - return (struct kms_connector *) nconn; + return (struct drm_connector *) nconn; } -static INLINE struct kms_mode * -kms_mode(const struct native_mode *nmode) +static INLINE struct drm_mode * +drm_mode(const struct native_mode *nmode) { - return (struct kms_mode *) nmode; + return (struct drm_mode *) nmode; } -#endif /* _NATIVE_KMS_H_ */ +boolean +drm_display_init_modeset(struct native_display *ndpy); + +void +drm_display_fini_modeset(struct native_display *ndpy); + +#endif /* _NATIVE_DRM_H_ */ diff --git a/src/gallium/state_trackers/egl/kms/native_kms.c b/src/gallium/state_trackers/egl/kms/native_kms.c deleted file mode 100644 index 208f73306c..0000000000 --- a/src/gallium/state_trackers/egl/kms/native_kms.c +++ /dev/null @@ -1,808 +0,0 @@ -/* - * Mesa 3-D graphics library - * Version: 7.8 - * - * Copyright (C) 2010 Chia-I Wu <olv@0xlab.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> - -#include "pipe/p_screen.h" -#include "pipe/p_context.h" -#include "util/u_debug.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "util/u_pointer.h" -#include "util/u_string.h" -#include "egllog.h" - -#include "native_kms.h" - -/* see get_drm_screen_name */ -#include <radeon_drm.h> -#include "radeon/drm/radeon_drm.h" - -static boolean -kms_surface_validate(struct native_surface *nsurf, uint attachment_mask, - unsigned int *seq_num, struct pipe_resource **textures, - int *width, int *height) -{ - struct kms_surface *ksurf = kms_surface(nsurf); - - if (!resource_surface_add_resources(ksurf->rsurf, attachment_mask)) - return FALSE; - if (textures) - resource_surface_get_resources(ksurf->rsurf, textures, attachment_mask); - - if (seq_num) - *seq_num = ksurf->sequence_number; - if (width) - *width = ksurf->width; - if (height) - *height = ksurf->height; - - return TRUE; -} - -/** - * Add textures as DRM framebuffers. - */ -static boolean -kms_surface_init_framebuffers(struct native_surface *nsurf, boolean need_back) -{ - struct kms_surface *ksurf = kms_surface(nsurf); - struct kms_display *kdpy = ksurf->kdpy; - int num_framebuffers = (need_back) ? 2 : 1; - int i, err; - - for (i = 0; i < num_framebuffers; i++) { - struct kms_framebuffer *fb; - enum native_attachment natt; - struct winsys_handle whandle; - uint block_bits; - - if (i == 0) { - fb = &ksurf->front_fb; - natt = NATIVE_ATTACHMENT_FRONT_LEFT; - } - else { - fb = &ksurf->back_fb; - natt = NATIVE_ATTACHMENT_BACK_LEFT; - } - - if (!fb->texture) { - /* make sure the texture has been allocated */ - resource_surface_add_resources(ksurf->rsurf, 1 << natt); - fb->texture = - resource_surface_get_single_resource(ksurf->rsurf, natt); - if (!fb->texture) - return FALSE; - } - - /* already initialized */ - if (fb->buffer_id) - continue; - - /* TODO detect the real value */ - fb->is_passive = TRUE; - - memset(&whandle, 0, sizeof(whandle)); - whandle.type = DRM_API_HANDLE_TYPE_KMS; - - if (!kdpy->base.screen->resource_get_handle(kdpy->base.screen, - fb->texture, &whandle)) - return FALSE; - - block_bits = util_format_get_blocksizebits(ksurf->color_format); - err = drmModeAddFB(kdpy->fd, ksurf->width, ksurf->height, - block_bits, block_bits, whandle.stride, whandle.handle, - &fb->buffer_id); - if (err) { - fb->buffer_id = 0; - return FALSE; - } - } - - return TRUE; -} - -static boolean -kms_surface_flush_frontbuffer(struct native_surface *nsurf) -{ -#ifdef DRM_MODE_FEATURE_DIRTYFB - struct kms_surface *ksurf = kms_surface(nsurf); - struct kms_display *kdpy = ksurf->kdpy; - - if (ksurf->front_fb.is_passive) - drmModeDirtyFB(kdpy->fd, ksurf->front_fb.buffer_id, NULL, 0); -#endif - - return TRUE; -} - -static boolean -kms_surface_swap_buffers(struct native_surface *nsurf) -{ - struct kms_surface *ksurf = kms_surface(nsurf); - struct kms_crtc *kcrtc = &ksurf->current_crtc; - struct kms_display *kdpy = ksurf->kdpy; - struct kms_framebuffer tmp_fb; - int err; - - if (!ksurf->back_fb.buffer_id) { - if (!kms_surface_init_framebuffers(&ksurf->base, TRUE)) - return FALSE; - } - - if (ksurf->is_shown && kcrtc->crtc) { - err = drmModeSetCrtc(kdpy->fd, kcrtc->crtc->crtc_id, - ksurf->back_fb.buffer_id, kcrtc->crtc->x, kcrtc->crtc->y, - kcrtc->connectors, kcrtc->num_connectors, &kcrtc->crtc->mode); - if (err) - return FALSE; - } - - /* swap the buffers */ - tmp_fb = ksurf->front_fb; - ksurf->front_fb = ksurf->back_fb; - ksurf->back_fb = tmp_fb; - - resource_surface_swap_buffers(ksurf->rsurf, - NATIVE_ATTACHMENT_FRONT_LEFT, NATIVE_ATTACHMENT_BACK_LEFT, FALSE); - /* the front/back textures are swapped */ - ksurf->sequence_number++; - kdpy->event_handler->invalid_surface(&kdpy->base, - &ksurf->base, ksurf->sequence_number); - - return TRUE; -} - -static void -kms_surface_wait(struct native_surface *nsurf) -{ - /* no-op */ -} - -static void -kms_surface_destroy(struct native_surface *nsurf) -{ - struct kms_surface *ksurf = kms_surface(nsurf); - - if (ksurf->current_crtc.crtc) - drmModeFreeCrtc(ksurf->current_crtc.crtc); - - if (ksurf->front_fb.buffer_id) - drmModeRmFB(ksurf->kdpy->fd, ksurf->front_fb.buffer_id); - pipe_resource_reference(&ksurf->front_fb.texture, NULL); - - if (ksurf->back_fb.buffer_id) - drmModeRmFB(ksurf->kdpy->fd, ksurf->back_fb.buffer_id); - pipe_resource_reference(&ksurf->back_fb.texture, NULL); - - resource_surface_destroy(ksurf->rsurf); - FREE(ksurf); -} - -static struct kms_surface * -kms_display_create_surface(struct native_display *ndpy, - const struct native_config *nconf, - uint width, uint height) -{ - struct kms_display *kdpy = kms_display(ndpy); - struct kms_config *kconf = kms_config(nconf); - struct kms_surface *ksurf; - - ksurf = CALLOC_STRUCT(kms_surface); - if (!ksurf) - return NULL; - - ksurf->kdpy = kdpy; - ksurf->color_format = kconf->base.color_format; - ksurf->width = width; - ksurf->height = height; - - ksurf->rsurf = resource_surface_create(kdpy->base.screen, - ksurf->color_format, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_SAMPLER_VIEW | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT); - if (!ksurf->rsurf) { - FREE(ksurf); - return NULL; - } - - resource_surface_set_size(ksurf->rsurf, ksurf->width, ksurf->height); - - ksurf->base.destroy = kms_surface_destroy; - ksurf->base.swap_buffers = kms_surface_swap_buffers; - ksurf->base.flush_frontbuffer = kms_surface_flush_frontbuffer; - ksurf->base.validate = kms_surface_validate; - ksurf->base.wait = kms_surface_wait; - - return ksurf; -} - -/** - * Choose a CRTC that supports all given connectors. - */ -static uint32_t -kms_display_choose_crtc(struct native_display *ndpy, - uint32_t *connectors, int num_connectors) -{ - struct kms_display *kdpy = kms_display(ndpy); - int idx; - - for (idx = 0; idx < kdpy->resources->count_crtcs; idx++) { - boolean found_crtc = TRUE; - int i, j; - - for (i = 0; i < num_connectors; i++) { - drmModeConnectorPtr connector; - int encoder_idx = -1; - - connector = drmModeGetConnector(kdpy->fd, connectors[i]); - if (!connector) { - found_crtc = FALSE; - break; - } - - /* find an encoder the CRTC supports */ - for (j = 0; j < connector->count_encoders; j++) { - drmModeEncoderPtr encoder = - drmModeGetEncoder(kdpy->fd, connector->encoders[j]); - if (encoder->possible_crtcs & (1 << idx)) { - encoder_idx = j; - break; - } - drmModeFreeEncoder(encoder); - } - - drmModeFreeConnector(connector); - if (encoder_idx < 0) { - found_crtc = FALSE; - break; - } - } - - if (found_crtc) - break; - } - - if (idx >= kdpy->resources->count_crtcs) { - _eglLog(_EGL_WARNING, - "failed to find a CRTC that supports the given %d connectors", - num_connectors); - return 0; - } - - return kdpy->resources->crtcs[idx]; -} - -/** - * Remember the original CRTC status and set the CRTC - */ -static boolean -kms_display_set_crtc(struct native_display *ndpy, int crtc_idx, - uint32_t buffer_id, uint32_t x, uint32_t y, - uint32_t *connectors, int num_connectors, - drmModeModeInfoPtr mode) -{ - struct kms_display *kdpy = kms_display(ndpy); - struct kms_crtc *kcrtc = &kdpy->saved_crtcs[crtc_idx]; - uint32_t crtc_id; - int err; - - if (kcrtc->crtc) { - crtc_id = kcrtc->crtc->crtc_id; - } - else { - int count = 0, i; - - /* - * Choose the CRTC once. It could be more dynamic, but let's keep it - * simple for now. - */ - crtc_id = kms_display_choose_crtc(&kdpy->base, - connectors, num_connectors); - - /* save the original CRTC status */ - kcrtc->crtc = drmModeGetCrtc(kdpy->fd, crtc_id); - if (!kcrtc->crtc) - return FALSE; - - for (i = 0; i < kdpy->num_connectors; i++) { - struct kms_connector *kconn = &kdpy->connectors[i]; - drmModeConnectorPtr connector = kconn->connector; - drmModeEncoderPtr encoder; - - encoder = drmModeGetEncoder(kdpy->fd, connector->encoder_id); - if (encoder) { - if (encoder->crtc_id == crtc_id) { - kcrtc->connectors[count++] = connector->connector_id; - if (count >= Elements(kcrtc->connectors)) - break; - } - drmModeFreeEncoder(encoder); - } - } - - kcrtc->num_connectors = count; - } - - err = drmModeSetCrtc(kdpy->fd, crtc_id, buffer_id, x, y, - connectors, num_connectors, mode); - if (err) { - drmModeFreeCrtc(kcrtc->crtc); - kcrtc->crtc = NULL; - kcrtc->num_connectors = 0; - - return FALSE; - } - - return TRUE; -} - -static boolean -kms_display_program(struct native_display *ndpy, int crtc_idx, - struct native_surface *nsurf, uint x, uint y, - const struct native_connector **nconns, int num_nconns, - const struct native_mode *nmode) -{ - struct kms_display *kdpy = kms_display(ndpy); - struct kms_surface *ksurf = kms_surface(nsurf); - const struct kms_mode *kmode = kms_mode(nmode); - uint32_t connector_ids[32]; - uint32_t buffer_id; - drmModeModeInfo mode_tmp, *mode; - int i; - - if (num_nconns > Elements(connector_ids)) { - _eglLog(_EGL_WARNING, "too many connectors (%d)", num_nconns); - num_nconns = Elements(connector_ids); - } - - if (ksurf) { - if (!kms_surface_init_framebuffers(&ksurf->base, FALSE)) - return FALSE; - - buffer_id = ksurf->front_fb.buffer_id; - /* the mode argument of drmModeSetCrtc is not constified */ - mode_tmp = kmode->mode; - mode = &mode_tmp; - } - else { - /* disable the CRTC */ - buffer_id = 0; - mode = NULL; - num_nconns = 0; - } - - for (i = 0; i < num_nconns; i++) { - struct kms_connector *kconn = kms_connector(nconns[i]); - connector_ids[i] = kconn->connector->connector_id; - } - - if (!kms_display_set_crtc(&kdpy->base, crtc_idx, buffer_id, x, y, - connector_ids, num_nconns, mode)) { - _eglLog(_EGL_WARNING, "failed to set CRTC %d", crtc_idx); - - return FALSE; - } - - if (kdpy->shown_surfaces[crtc_idx]) - kdpy->shown_surfaces[crtc_idx]->is_shown = FALSE; - kdpy->shown_surfaces[crtc_idx] = ksurf; - - /* remember the settings for buffer swapping */ - if (ksurf) { - uint32_t crtc_id = kdpy->saved_crtcs[crtc_idx].crtc->crtc_id; - struct kms_crtc *kcrtc = &ksurf->current_crtc; - - if (kcrtc->crtc) - drmModeFreeCrtc(kcrtc->crtc); - kcrtc->crtc = drmModeGetCrtc(kdpy->fd, crtc_id); - - assert(num_nconns < Elements(kcrtc->connectors)); - memcpy(kcrtc->connectors, connector_ids, - sizeof(*connector_ids) * num_nconns); - kcrtc->num_connectors = num_nconns; - - ksurf->is_shown = TRUE; - } - - return TRUE; -} - -static const struct native_mode ** -kms_display_get_modes(struct native_display *ndpy, - const struct native_connector *nconn, - int *num_modes) -{ - struct kms_display *kdpy = kms_display(ndpy); - struct kms_connector *kconn = kms_connector(nconn); - const struct native_mode **nmodes_return; - int count, i; - - /* delete old data */ - if (kconn->connector) { - drmModeFreeConnector(kconn->connector); - FREE(kconn->kms_modes); - - kconn->connector = NULL; - kconn->kms_modes = NULL; - kconn->num_modes = 0; - } - - /* detect again */ - kconn->connector = drmModeGetConnector(kdpy->fd, kconn->connector_id); - if (!kconn->connector) - return NULL; - - count = kconn->connector->count_modes; - kconn->kms_modes = CALLOC(count, sizeof(*kconn->kms_modes)); - if (!kconn->kms_modes) { - drmModeFreeConnector(kconn->connector); - kconn->connector = NULL; - - return NULL; - } - - for (i = 0; i < count; i++) { - struct kms_mode *kmode = &kconn->kms_modes[i]; - drmModeModeInfoPtr mode = &kconn->connector->modes[i]; - - kmode->mode = *mode; - - kmode->base.desc = kmode->mode.name; - kmode->base.width = kmode->mode.hdisplay; - kmode->base.height = kmode->mode.vdisplay; - kmode->base.refresh_rate = kmode->mode.vrefresh; - /* not all kernels have vrefresh = refresh_rate * 1000 */ - if (kmode->base.refresh_rate > 1000) - kmode->base.refresh_rate = (kmode->base.refresh_rate + 500) / 1000; - } - - nmodes_return = MALLOC(count * sizeof(*nmodes_return)); - if (nmodes_return) { - for (i = 0; i < count; i++) - nmodes_return[i] = &kconn->kms_modes[i].base; - if (num_modes) - *num_modes = count; - } - - return nmodes_return; -} - -static const struct native_connector ** -kms_display_get_connectors(struct native_display *ndpy, int *num_connectors, - int *num_crtc) -{ - struct kms_display *kdpy = kms_display(ndpy); - const struct native_connector **connectors; - int i; - - if (!kdpy->connectors) { - kdpy->connectors = - CALLOC(kdpy->resources->count_connectors, sizeof(*kdpy->connectors)); - if (!kdpy->connectors) - return NULL; - - for (i = 0; i < kdpy->resources->count_connectors; i++) { - struct kms_connector *kconn = &kdpy->connectors[i]; - - kconn->connector_id = kdpy->resources->connectors[i]; - /* kconn->connector is allocated when the modes are asked */ - } - - kdpy->num_connectors = kdpy->resources->count_connectors; - } - - connectors = MALLOC(kdpy->num_connectors * sizeof(*connectors)); - if (connectors) { - for (i = 0; i < kdpy->num_connectors; i++) - connectors[i] = &kdpy->connectors[i].base; - if (num_connectors) - *num_connectors = kdpy->num_connectors; - } - - if (num_crtc) - *num_crtc = kdpy->resources->count_crtcs; - - return connectors; -} - -static struct native_surface * -kms_display_create_scanout_surface(struct native_display *ndpy, - const struct native_config *nconf, - uint width, uint height) -{ - struct kms_surface *ksurf; - - ksurf = kms_display_create_surface(ndpy, nconf, width, height); - return &ksurf->base; -} - -static boolean -kms_display_is_format_supported(struct native_display *ndpy, - enum pipe_format fmt, boolean is_color) -{ - return ndpy->screen->is_format_supported(ndpy->screen, - fmt, PIPE_TEXTURE_2D, 0, - (is_color) ? PIPE_BIND_RENDER_TARGET : - PIPE_BIND_DEPTH_STENCIL, 0); -} - -static const struct native_config ** -kms_display_get_configs(struct native_display *ndpy, int *num_configs) -{ - struct kms_display *kdpy = kms_display(ndpy); - const struct native_config **configs; - - /* first time */ - if (!kdpy->config) { - struct native_config *nconf; - enum pipe_format format; - - kdpy->config = CALLOC(1, sizeof(*kdpy->config)); - if (!kdpy->config) - return NULL; - - nconf = &kdpy->config->base; - - nconf->buffer_mask = - (1 << NATIVE_ATTACHMENT_FRONT_LEFT) | - (1 << NATIVE_ATTACHMENT_BACK_LEFT); - - format = PIPE_FORMAT_B8G8R8A8_UNORM; - if (!kms_display_is_format_supported(&kdpy->base, format, TRUE)) { - format = PIPE_FORMAT_A8R8G8B8_UNORM; - if (!kms_display_is_format_supported(&kdpy->base, format, TRUE)) - format = PIPE_FORMAT_NONE; - } - if (format == PIPE_FORMAT_NONE) { - FREE(kdpy->config); - kdpy->config = NULL; - return NULL; - } - - nconf->color_format = format; - - /* support KMS */ - if (kdpy->resources) - nconf->scanout_bit = TRUE; - } - - configs = MALLOC(sizeof(*configs)); - if (configs) { - configs[0] = &kdpy->config->base; - if (num_configs) - *num_configs = 1; - } - - return configs; -} - -static int -kms_display_get_param(struct native_display *ndpy, - enum native_param_type param) -{ - int val; - - switch (param) { - default: - val = 0; - break; - } - - return val; -} - -static void -kms_display_destroy(struct native_display *ndpy) -{ - struct kms_display *kdpy = kms_display(ndpy); - int i; - - if (kdpy->config) - FREE(kdpy->config); - - if (kdpy->connectors) { - for (i = 0; i < kdpy->num_connectors; i++) { - struct kms_connector *kconn = &kdpy->connectors[i]; - if (kconn->connector) { - drmModeFreeConnector(kconn->connector); - FREE(kconn->kms_modes); - } - } - FREE(kdpy->connectors); - } - - if (kdpy->shown_surfaces) - FREE(kdpy->shown_surfaces); - - if (kdpy->saved_crtcs) { - for (i = 0; i < kdpy->resources->count_crtcs; i++) { - struct kms_crtc *kcrtc = &kdpy->saved_crtcs[i]; - - if (kcrtc->crtc) { - /* restore crtc */ - drmModeSetCrtc(kdpy->fd, kcrtc->crtc->crtc_id, - kcrtc->crtc->buffer_id, kcrtc->crtc->x, kcrtc->crtc->y, - kcrtc->connectors, kcrtc->num_connectors, - &kcrtc->crtc->mode); - - drmModeFreeCrtc(kcrtc->crtc); - } - } - FREE(kdpy->saved_crtcs); - } - - if (kdpy->resources) - drmModeFreeResources(kdpy->resources); - - if (kdpy->base.screen) - kdpy->base.screen->destroy(kdpy->base.screen); - - if (kdpy->fd >= 0) - close(kdpy->fd); - - FREE(kdpy); -} - -static const char * -get_drm_screen_name(int fd, drmVersionPtr version) -{ - const char *name = version->name; - - if (name && !strcmp(name, "radeon")) { - int chip_id; - struct drm_radeon_info info; - - memset(&info, 0, sizeof(info)); - info.request = RADEON_INFO_DEVICE_ID; - info.value = pointer_to_intptr(&chip_id); - if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0) - return NULL; - - name = is_r3xx(chip_id) ? "r300" : "r600"; - } - - return name; -} - -/** - * Initialize KMS and pipe screen. - */ -static boolean -kms_display_init_screen(struct native_display *ndpy) -{ - struct kms_display *kdpy = kms_display(ndpy); - drmVersionPtr version; - const char *name; - - version = drmGetVersion(kdpy->fd); - if (!version) { - _eglLog(_EGL_WARNING, "invalid fd %d", kdpy->fd); - return FALSE; - } - - name = get_drm_screen_name(kdpy->fd, version); - if (name) { - kdpy->base.screen = - kdpy->event_handler->new_drm_screen(&kdpy->base, name, kdpy->fd); - } - drmFreeVersion(version); - - if (!kdpy->base.screen) { - _eglLog(_EGL_WARNING, "failed to create DRM screen"); - return FALSE; - } - - return TRUE; -} - -static struct native_display_modeset kms_display_modeset = { - .get_connectors = kms_display_get_connectors, - .get_modes = kms_display_get_modes, - .create_scanout_surface = kms_display_create_scanout_surface, - .program = kms_display_program -}; - -static struct native_display * -kms_create_display(int fd, struct native_event_handler *event_handler, - void *user_data) -{ - struct kms_display *kdpy; - - kdpy = CALLOC_STRUCT(kms_display); - if (!kdpy) - return NULL; - - kdpy->fd = fd; - kdpy->event_handler = event_handler; - kdpy->base.user_data = user_data; - - if (!kms_display_init_screen(&kdpy->base)) { - kms_display_destroy(&kdpy->base); - return NULL; - } - - kdpy->base.destroy = kms_display_destroy; - kdpy->base.get_param = kms_display_get_param; - kdpy->base.get_configs = kms_display_get_configs; - - /* resources are fixed, unlike crtc, connector, or encoder */ - kdpy->resources = drmModeGetResources(kdpy->fd); - if (kdpy->resources) { - kdpy->saved_crtcs = - CALLOC(kdpy->resources->count_crtcs, sizeof(*kdpy->saved_crtcs)); - if (!kdpy->saved_crtcs) { - kms_display_destroy(&kdpy->base); - return NULL; - } - - kdpy->shown_surfaces = - CALLOC(kdpy->resources->count_crtcs, sizeof(*kdpy->shown_surfaces)); - if (!kdpy->shown_surfaces) { - kms_display_destroy(&kdpy->base); - return NULL; - } - - kdpy->base.modeset = &kms_display_modeset; - } - else { - _eglLog(_EGL_DEBUG, "Failed to get KMS resources. Disable modeset."); - } - - return &kdpy->base; -} - -static struct native_display * -native_create_display(void *dpy, struct native_event_handler *event_handler, - void *user_data) -{ - int fd; - - if (dpy) { - fd = dup((int) pointer_to_intptr(dpy)); - } - else { - fd = open("/dev/dri/card0", O_RDWR); - } - if (fd < 0) - return NULL; - - return kms_create_display(fd, event_handler, user_data); -} - -static const struct native_platform kms_platform = { - "KMS", /* name */ - native_create_display -}; - -const struct native_platform * -native_get_kms_platform(void) -{ - return &kms_platform; -} diff --git a/src/gallium/state_trackers/egl/x11/glcore.h b/src/gallium/state_trackers/egl/x11/glcore.h new file mode 100644 index 0000000000..547b111370 --- /dev/null +++ b/src/gallium/state_trackers/egl/x11/glcore.h @@ -0,0 +1,181 @@ +#ifndef __gl_core_h_ +#define __gl_core_h_ + +/* + * SGI FREE SOFTWARE LICENSE B (Version 2.0, Sept. 18, 2008) + * Copyright (C) 1991-2000 Silicon Graphics, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice including the dates of first publication and + * either this permission notice or a reference to + * http://oss.sgi.com/projects/FreeB/ + * shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * SILICON GRAPHICS, INC. BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Except as contained in this notice, the name of Silicon Graphics, Inc. + * shall not be used in advertising or otherwise to promote the sale, use or + * other dealings in this Software without prior written authorization from + * Silicon Graphics, Inc. + */ + +#if !defined(_WIN32_WCE) +#include <sys/types.h> +#endif + +#define GL_CORE_SGI 1 +#define GL_CORE_MESA 2 +#define GL_CORE_APPLE 4 +#define GL_CORE_WINDOWS 8 + +typedef struct __GLcontextRec __GLcontext; + +/* +** This file defines the interface between the GL core and the surrounding +** "operating system" that supports it (currently the GLX or WGL extensions). +** +** Members (data and function pointers) are documented as imported or +** exported according to how they are used by the core rendering functions. +** Imported members are initialized by the "operating system" and used by +** the core functions. Exported members are initialized by the core functions +** and used by the "operating system". +*/ + +/** + * Mode and limit information for a context. This information is + * kept around in the context so that values can be used during + * command execution, and for returning information about the + * context to the application. + * + * Instances of this structure are shared by the driver and the loader. To + * maintain binary compatability, new fields \b must be added only to the + * end of the structure. + * + * \sa _gl_context_modes_create + */ +typedef struct __GLcontextModesRec { + struct __GLcontextModesRec * next; + + GLboolean rgbMode; + GLboolean floatMode; + GLboolean colorIndexMode; + GLuint doubleBufferMode; + GLuint stereoMode; + + GLboolean haveAccumBuffer; + GLboolean haveDepthBuffer; + GLboolean haveStencilBuffer; + + GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */ + GLuint redMask, greenMask, blueMask, alphaMask; + GLint rgbBits; /* total bits for rgb */ + GLint indexBits; /* total bits for colorindex */ + + GLint accumRedBits, accumGreenBits, accumBlueBits, accumAlphaBits; + GLint depthBits; + GLint stencilBits; + + GLint numAuxBuffers; + + GLint level; + + GLint pixmapMode; + + /* GLX */ + GLint visualID; + GLint visualType; /**< One of the GLX X visual types. (i.e., + * \c GLX_TRUE_COLOR, etc.) + */ + + /* EXT_visual_rating / GLX 1.2 */ + GLint visualRating; + + /* EXT_visual_info / GLX 1.2 */ + GLint transparentPixel; + /* colors are floats scaled to ints */ + GLint transparentRed, transparentGreen, transparentBlue, transparentAlpha; + GLint transparentIndex; + + /* ARB_multisample / SGIS_multisample */ + GLint sampleBuffers; + GLint samples; + + /* SGIX_fbconfig / GLX 1.3 */ + GLint drawableType; + GLint renderType; + GLint xRenderable; + GLint fbconfigID; + + /* SGIX_pbuffer / GLX 1.3 */ + GLint maxPbufferWidth; + GLint maxPbufferHeight; + GLint maxPbufferPixels; + GLint optimalPbufferWidth; /* Only for SGIX_pbuffer. */ + GLint optimalPbufferHeight; /* Only for SGIX_pbuffer. */ + + /* SGIX_visual_select_group */ + GLint visualSelectGroup; + + /* OML_swap_method */ + GLint swapMethod; + + GLint screen; + + /* EXT_texture_from_pixmap */ + GLint bindToTextureRgb; + GLint bindToTextureRgba; + GLint bindToMipmapTexture; + GLint bindToTextureTargets; + GLint yInverted; +} __GLcontextModes; + +/* Several fields of __GLcontextModes can take these as values. Since + * GLX header files may not be available everywhere they need to be used, + * redefine them here. + */ +#define GLX_NONE 0x8000 +#define GLX_SLOW_CONFIG 0x8001 +#define GLX_TRUE_COLOR 0x8002 +#define GLX_DIRECT_COLOR 0x8003 +#define GLX_PSEUDO_COLOR 0x8004 +#define GLX_STATIC_COLOR 0x8005 +#define GLX_GRAY_SCALE 0x8006 +#define GLX_STATIC_GRAY 0x8007 +#define GLX_TRANSPARENT_RGB 0x8008 +#define GLX_TRANSPARENT_INDEX 0x8009 +#define GLX_NON_CONFORMANT_CONFIG 0x800D +#define GLX_SWAP_EXCHANGE_OML 0x8061 +#define GLX_SWAP_COPY_OML 0x8062 +#define GLX_SWAP_UNDEFINED_OML 0x8063 + +#define GLX_DONT_CARE 0xFFFFFFFF + +#define GLX_RGBA_BIT 0x00000001 +#define GLX_COLOR_INDEX_BIT 0x00000002 +#define GLX_WINDOW_BIT 0x00000001 +#define GLX_PIXMAP_BIT 0x00000002 +#define GLX_PBUFFER_BIT 0x00000004 + +#define GLX_BIND_TO_TEXTURE_RGB_EXT 0x20D0 +#define GLX_BIND_TO_TEXTURE_RGBA_EXT 0x20D1 +#define GLX_BIND_TO_MIPMAP_TEXTURE_EXT 0x20D2 +#define GLX_BIND_TO_TEXTURE_TARGETS_EXT 0x20D3 +#define GLX_Y_INVERTED_EXT 0x20D4 + +#define GLX_TEXTURE_1D_BIT_EXT 0x00000001 +#define GLX_TEXTURE_2D_BIT_EXT 0x00000002 +#define GLX_TEXTURE_RECTANGLE_BIT_EXT 0x00000004 + +#endif /* __gl_core_h_ */ diff --git a/src/gallium/state_trackers/egl/x11/glxinit.c b/src/gallium/state_trackers/egl/x11/glxinit.c index 57c6aaff86..df8370f8d7 100644 --- a/src/gallium/state_trackers/egl/x11/glxinit.c +++ b/src/gallium/state_trackers/egl/x11/glxinit.c @@ -18,7 +18,7 @@ #include "GL/glxproto.h" #include "GL/glxtokens.h" #include "GL/gl.h" /* for GL types needed by __GLcontextModes */ -#include "GL/internal/glcore.h" /* for __GLcontextModes */ +#include "glcore.h" /* for __GLcontextModes */ #include "glxinit.h" diff --git a/src/gallium/state_trackers/egl/x11/native_dri2.c b/src/gallium/state_trackers/egl/x11/native_dri2.c index 1be1e42468..1169e273c3 100644 --- a/src/gallium/state_trackers/egl/x11/native_dri2.c +++ b/src/gallium/state_trackers/egl/x11/native_dri2.c @@ -518,10 +518,6 @@ dri2_display_convert_config(struct native_display *ndpy, if (!(mode->renderType & GLX_RGBA_BIT) || !mode->rgbMode) return FALSE; - /* skip single-buffered configs */ - if (!mode->doubleBufferMode) - return FALSE; - /* only interested in native renderable configs */ if (!mode->xRenderable || !mode->drawableType) return FALSE; diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.c b/src/gallium/state_trackers/egl/x11/x11_screen.c index c07ebb7ef6..c919b79eac 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.c +++ b/src/gallium/state_trackers/egl/x11/x11_screen.c @@ -432,4 +432,15 @@ dri2InvalidateBuffers(Display *dpy, XID drawable) xscr->dri_invalidate_buffers(xscr, drawable, xscr->dri_user_data); } +extern unsigned +dri2GetSwapEventType(Display *dpy, XID drawable); + +/** + * This is also called from src/glx/dri2.c. + */ +unsigned dri2GetSwapEventType(Display *dpy, XID drawable) +{ + return 0; +} + #endif /* GLX_DIRECT_RENDERING */ diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.h b/src/gallium/state_trackers/egl/x11/x11_screen.h index bc0ef69ec6..2e313e0148 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.h +++ b/src/gallium/state_trackers/egl/x11/x11_screen.h @@ -30,7 +30,7 @@ #include <X11/Xutil.h> #include <X11/extensions/dri2tokens.h> #include "GL/gl.h" /* for GL types needed by __GLcontextModes */ -#include "GL/internal/glcore.h" /* for __GLcontextModes */ +#include "glcore.h" /* for __GLcontextModes */ #include "pipe/p_compiler.h" #include "common/native.h" diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index f950c8858b..8332633f01 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -423,7 +423,7 @@ static XMesaBuffer XMesaBufferList = NULL; /** * Allocate a new XMesaBuffer object which corresponds to the given drawable. - * Note that XMesaBuffer is derived from GLframebuffer. + * Note that XMesaBuffer is derived from struct gl_framebuffer. * The new XMesaBuffer will not have any size (Width=Height=0). * * \param d the corresponding X drawable (window or pixmap) @@ -569,7 +569,7 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, /* RGB WINDOW: * We support RGB rendering into almost any kind of visual. */ - const int xclass = v->mesa_visual.visualType; + const int xclass = v->visualType; if (xclass != GLX_TRUE_COLOR && xclass == !GLX_DIRECT_COLOR) { _mesa_warning(NULL, "XMesa: RGB mode rendering not supported in given visual.\n"); @@ -716,13 +716,13 @@ XMesaVisual XMesaCreateVisual( Display *display, v->mesa_visual.redMask = visinfo->red_mask; v->mesa_visual.greenMask = visinfo->green_mask; v->mesa_visual.blueMask = visinfo->blue_mask; - v->mesa_visual.visualID = visinfo->visualid; - v->mesa_visual.screen = visinfo->screen; + v->visualID = visinfo->visualid; + v->screen = visinfo->screen; #if !(defined(__cplusplus) || defined(c_plusplus)) - v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->class); + v->visualType = xmesa_convert_from_x_visual_type(visinfo->class); #else - v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->c_class); + v->visualType = xmesa_convert_from_x_visual_type(visinfo->c_class); #endif v->mesa_visual.visualRating = visualCaveat; @@ -733,7 +733,7 @@ XMesaVisual XMesaCreateVisual( Display *display, (void) initialize_visual_and_buffer( v, NULL, rgb_flag, 0, 0 ); { - const int xclass = v->mesa_visual.visualType; + const int xclass = v->visualType; if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) { red_bits = _mesa_bitcount(GET_REDMASK(v)); green_bits = _mesa_bitcount(GET_GREENMASK(v)); @@ -756,7 +756,7 @@ XMesaVisual XMesaCreateVisual( Display *display, /* initialize visual */ { - __GLcontextModes *vis = &v->mesa_visual; + struct gl_config *vis = &v->mesa_visual; vis->rgbMode = GL_TRUE; vis->doubleBufferMode = db_flag; @@ -783,7 +783,6 @@ XMesaVisual XMesaCreateVisual( Display *display, vis->numAuxBuffers = 0; vis->level = 0; - vis->pixmapMode = 0; vis->sampleBuffers = 0; vis->samples = 0; } @@ -855,7 +854,7 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) if (!xmdpy) return NULL; - /* Note: the XMesaContext contains a Mesa GLcontext struct (inheritance) */ + /* Note: the XMesaContext contains a Mesa struct gl_context struct (inheritance) */ c = (XMesaContext) CALLOC_STRUCT(xmesa_context); if (!c) return NULL; diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.h b/src/gallium/state_trackers/glx/xlib/xm_api.h index fedf2b2d5a..b8ac979edc 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.h +++ b/src/gallium/state_trackers/glx/xlib/xm_api.h @@ -280,7 +280,8 @@ XMesaCopyContext(XMesaContext src, XMesaContext dst, unsigned long mask); * Basically corresponds to an XVisualInfo. */ struct xmesa_visual { - GLvisual mesa_visual; /* Device independent visual parameters */ + struct gl_config mesa_visual;/* Device independent visual parameters */ + int screen, visualID, visualType; Display *display; /* The X11 display */ XVisualInfo * visinfo; /* X's visual info (pointer to private copy) */ XVisualInfo *vishandle; /* Only used in fakeglx.c */ diff --git a/src/gallium/state_trackers/glx/xlib/xm_st.c b/src/gallium/state_trackers/glx/xlib/xm_st.c index 4d0f5e6625..e7466bdbee 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_st.c +++ b/src/gallium/state_trackers/glx/xlib/xm_st.c @@ -196,7 +196,13 @@ xmesa_st_framebuffer_validate_textures(struct st_framebuffer_iface *stfbi, /** + * Check that a framebuffer's attachments match the window's size. + * * Called via st_framebuffer_iface::validate() + * + * \param statts array of framebuffer attachments + * \param count number of framebuffer attachments in statts[] + * \param out returns resources for each of the attachments */ static boolean xmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, @@ -209,9 +215,11 @@ xmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, boolean resized; boolean ret; + /* build mask of ST_ATTACHMENT bits */ statt_mask = 0x0; for (i = 0; i < count; i++) statt_mask |= 1 << statts[i]; + /* record newly allocated textures */ new_mask = statt_mask & ~xstfb->texture_mask; diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index dce24bc17d..29813456b5 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -31,7 +31,6 @@ #include "pipe/p_shader_tokens.h" #include "util/u_inlines.h" #include "cso_cache/cso_context.h" -#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_sampler.h" diff --git a/src/gallium/state_trackers/vega/api_images.c b/src/gallium/state_trackers/vega/api_images.c index 547508f815..c36b3d2f3c 100644 --- a/src/gallium/state_trackers/vega/api_images.c +++ b/src/gallium/state_trackers/vega/api_images.c @@ -31,7 +31,6 @@ #include "vg_context.h" #include "vg_translate.h" #include "api_consts.h" -#include "image.h" #include "api.h" #include "pipe/p_context.h" diff --git a/src/gallium/state_trackers/vega/vg_manager.c b/src/gallium/state_trackers/vega/vg_manager.c index e7996741d1..232deefa16 100644 --- a/src/gallium/state_trackers/vega/vg_manager.c +++ b/src/gallium/state_trackers/vega/vg_manager.c @@ -352,7 +352,7 @@ vg_api_create_context(struct st_api *stapi, struct st_manager *smapi, return NULL; /* only 1.0 is supported */ - if (attribs->major != 1 || attribs->minor > 0) + if (attribs->major > 1 || (attribs->major == 1 && attribs->minor > 0)) return NULL; pipe = smapi->screen->context_create(smapi->screen, NULL); diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index 26a907f205..c65da71cdb 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -234,6 +234,10 @@ crtc_load_cursor_argb_ga3d(xf86CrtcPtr crtc, CARD32 * image) 64, 64, (void*)image, 64 * 4, 0, 0); ms->ctx->transfer_unmap(ms->ctx, transfer); ms->ctx->transfer_destroy(ms->ctx, transfer); + + if (crtc->cursor_shown) + drmModeSetCursor(ms->fd, crtcp->drm_crtc->crtc_id, + crtcp->cursor_handle, 64, 64); } #if HAVE_LIBKMS @@ -271,6 +275,10 @@ crtc_load_cursor_argb_kms(xf86CrtcPtr crtc, CARD32 * image) memcpy(ptr, image, 64*64*4); kms_bo_unmap(crtcp->cursor_bo); + if (crtc->cursor_shown) + drmModeSetCursor(ms->fd, crtcp->drm_crtc->crtc_id, + crtcp->cursor_handle, 64, 64); + return; err_bo_destroy: diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index e10ff2f950..3a5db9856d 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -369,6 +369,7 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) ms = modesettingPTR(pScrn); ms->pEnt = pEnt; ms->cust = cust; + ms->fb_id = -1; pScrn->displayWidth = 640; /* default it */ @@ -402,19 +403,6 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) if (!drv_init_drm(pScrn)) return FALSE; - use3D = cust ? !cust->no_3d : TRUE; - ms->from_3D = xf86GetOptValBool(ms->Options, OPTION_3D_ACCEL, - &use3D) ? - X_CONFIG : X_PROBED; - - ms->no3D = !use3D; - - if (!drv_init_resource_management(pScrn)) { - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Could not init " - "Gallium3D or libKMS."); - return FALSE; - } - pScrn->monitor = pScrn->confScreen->monitor; pScrn->progClock = TRUE; pScrn->rgbBits = 8; @@ -449,6 +437,19 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) memcpy(ms->Options, drv_options, sizeof(drv_options)); xf86ProcessOptions(pScrn->scrnIndex, pScrn->options, ms->Options); + use3D = cust ? !cust->no_3d : TRUE; + ms->from_3D = xf86GetOptValBool(ms->Options, OPTION_3D_ACCEL, + &use3D) ? + X_CONFIG : X_PROBED; + + ms->no3D = !use3D; + + if (!drv_init_resource_management(pScrn)) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Could not init " + "Gallium3D or libKMS."); + return FALSE; + } + /* Allocate an xf86CrtcConfig */ xf86CrtcConfigInit(pScrn, &crtc_config_funcs); xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); @@ -791,7 +792,9 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) if (!ms->SWCursor) xf86_cursors_init(pScreen, 64, 64, HARDWARE_CURSOR_SOURCE_MASK_INTERLEAVE_64 | - HARDWARE_CURSOR_ARGB); + HARDWARE_CURSOR_ARGB | + ((cust && cust->unhidden_hw_cursor_update) ? + HARDWARE_CURSOR_UPDATE_UNHIDDEN : 0)); /* Must force it before EnterVT, so we are in control of VT and * later memory should be bound when allocating, e.g rotate_mem */ @@ -862,8 +865,10 @@ drv_leave_vt(int scrnIndex, int flags) } } - drmModeRmFB(ms->fd, ms->fb_id); - ms->fb_id = -1; + if (ms->fb_id != -1) { + drmModeRmFB(ms->fd, ms->fb_id); + ms->fb_id = -1; + } /* idle hardware */ if (!ms->kms) @@ -944,7 +949,6 @@ drv_close_screen(int scrnIndex, ScreenPtr pScreen) } #endif - drmModeRmFB(ms->fd, ms->fb_id); ms->destroy_front_buffer(pScrn); if (ms->exa) diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index be1a9fda48..a3fb5e5dad 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -76,6 +76,7 @@ typedef struct _CustomizerRec Bool dirty_throttling; Bool swap_throttling; Bool no_3d; + Bool unhidden_hw_cursor_update; Bool (*winsys_pre_init) (struct _CustomizerRec *cust, int fd); Bool (*winsys_screen_init)(struct _CustomizerRec *cust); Bool (*winsys_screen_close)(struct _CustomizerRec *cust); diff --git a/src/gallium/targets/SConscript.dri b/src/gallium/targets/SConscript.dri index e5981c2461..bc8d179e3d 100644 --- a/src/gallium/targets/SConscript.dri +++ b/src/gallium/targets/SConscript.dri @@ -69,6 +69,7 @@ COMMON_DRI_DRM_OBJECTS = [ drienv.AppendUnique(LIBS = [ 'expat', + 'talloc', ]) Export([ diff --git a/src/gallium/targets/dri-i915/SConscript b/src/gallium/targets/dri-i915/SConscript index 6f9336b5ac..172f92d6b8 100644 --- a/src/gallium/targets/dri-i915/SConscript +++ b/src/gallium/targets/dri-i915/SConscript @@ -11,6 +11,7 @@ env.ParseConfig('pkg-config --cflags --libs libdrm_intel') env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE', 'GALLIUM_GALAHAD']) env.Prepend(LIBS = [ + ws_wrapper, st_dri, i915drm, i915, diff --git a/src/gallium/targets/dri-i915/target.c b/src/gallium/targets/dri-i915/target.c index 5ae6ca367d..a27b7bd6d8 100644 --- a/src/gallium/targets/dri-i915/target.c +++ b/src/gallium/targets/dri-i915/target.c @@ -19,8 +19,7 @@ create_screen(int fd) if (!screen) return NULL; - if (debug_get_bool_option("I915_SOFTWARE", FALSE)) - screen = sw_screen_wrap(screen); + screen = sw_screen_wrap(screen); screen = debug_screen_wrap(screen); diff --git a/src/gallium/targets/dri-i965/target.c b/src/gallium/targets/dri-i965/target.c index ce97f82027..0632b97bea 100644 --- a/src/gallium/targets/dri-i965/target.c +++ b/src/gallium/targets/dri-i965/target.c @@ -19,8 +19,7 @@ create_screen(int fd) if (!screen) return NULL; - if (debug_get_bool_option("BRW_SOFTPIPE", FALSE)) - screen = sw_screen_wrap(screen); + screen = sw_screen_wrap(screen); screen = debug_screen_wrap(screen); diff --git a/src/gallium/targets/dri-r600/target.c b/src/gallium/targets/dri-r600/target.c index eb268d5bc0..8753e2bab1 100644 --- a/src/gallium/targets/dri-r600/target.c +++ b/src/gallium/targets/dri-r600/target.c @@ -1,33 +1,9 @@ - #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" #include "r600/drm/r600_drm_public.h" #include "r600/r600_public.h" -#if 1 -static struct pipe_screen * -create_screen(int fd) -{ - struct radeon *rw; - struct pipe_screen *screen; - - rw = r600_drm_winsys_create(fd); - if (!rw) - return NULL; - - screen = r600_screen_create(rw); - if (!screen) - return NULL; - - screen = debug_screen_wrap(screen); - - return screen; -} -#else -struct radeon *r600_new(int fd, unsigned device); -struct pipe_screen *r600_screen_create2(struct radeon *radeon); -static struct pipe_screen * -create_screen(int fd) +static struct pipe_screen *create_screen(int fd) { struct radeon *radeon; struct pipe_screen *screen; @@ -36,7 +12,7 @@ create_screen(int fd) if (!radeon) return NULL; - screen = r600_screen_create2(radeon); + screen = r600_screen_create(radeon); if (!screen) return NULL; @@ -44,6 +20,5 @@ create_screen(int fd) return screen; } -#endif DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen) diff --git a/src/gallium/targets/egl/Makefile b/src/gallium/targets/egl/Makefile index 47c24cefe5..38e60dbafb 100644 --- a/src/gallium/targets/egl/Makefile +++ b/src/gallium/targets/egl/Makefile @@ -24,7 +24,9 @@ common_CPPFLAGS := \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/winsys + -I$(TOP)/src/gallium/winsys \ + $(LIBDRM_CFLAGS) + common_SYS := common_LIBS := \ $(TOP)/src/gallium/drivers/identity/libidentity.a \ @@ -41,11 +43,11 @@ egl_SYS := -lm $(DLOPEN_LIBS) -L$(TOP)/$(LIB_DIR) -lEGL egl_LIBS := $(TOP)/src/gallium/state_trackers/egl/libegl.a ifneq ($(findstring x11, $(EGL_PLATFORMS)),) -egl_SYS += -lX11 -lXext -lXfixes +egl_SYS += -lX11 -lXext -lXfixes $(LIBDRM_LIB) egl_LIBS += $(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a endif -ifneq ($(findstring kms, $(EGL_PLATFORMS)),) -egl_SYS += -ldrm +ifneq ($(findstring drm, $(EGL_PLATFORMS)),) +egl_SYS += $(LIBDRM_LIB) endif ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) egl_LIBS += $(TOP)/src/gallium/winsys/sw/fbdev/libfbdev.a diff --git a/src/gallium/targets/egl/pipe_i915.c b/src/gallium/targets/egl/pipe_i915.c index 758a921b48..cd74044d8c 100644 --- a/src/gallium/targets/egl/pipe_i915.c +++ b/src/gallium/targets/egl/pipe_i915.c @@ -1,5 +1,4 @@ -#include "target-helpers/inline_wrapper_sw_helper.h" #include "target-helpers/inline_debug_helper.h" #include "state_tracker/drm_driver.h" #include "i915/drm/i915_drm_public.h" diff --git a/src/gallium/targets/egl/pipe_i965.c b/src/gallium/targets/egl/pipe_i965.c index 43bf646e82..6b886fb3f1 100644 --- a/src/gallium/targets/egl/pipe_i965.c +++ b/src/gallium/targets/egl/pipe_i965.c @@ -1,5 +1,4 @@ -#include "target-helpers/inline_wrapper_sw_helper.h" #include "target-helpers/inline_debug_helper.h" #include "state_tracker/drm_driver.h" #include "i965/drm/i965_drm_public.h" @@ -19,8 +18,7 @@ create_screen(int fd) if (!screen) return NULL; - if (debug_get_bool_option("BRW_SOFTPIPE", FALSE)) - screen = sw_screen_wrap(screen); + screen = sw_screen_wrap(screen); screen = debug_screen_wrap(screen); diff --git a/src/gallium/targets/xorg-i965/intel_target.c b/src/gallium/targets/xorg-i965/intel_target.c index ce97f82027..0632b97bea 100644 --- a/src/gallium/targets/xorg-i965/intel_target.c +++ b/src/gallium/targets/xorg-i965/intel_target.c @@ -19,8 +19,7 @@ create_screen(int fd) if (!screen) return NULL; - if (debug_get_bool_option("BRW_SOFTPIPE", FALSE)) - screen = sw_screen_wrap(screen); + screen = sw_screen_wrap(screen); screen = debug_screen_wrap(screen); diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c b/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c index 237b308ae3..9c075b5597 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c +++ b/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c @@ -32,6 +32,7 @@ * allows X clients to communicate with the driver. */ +#include <xorg-server.h> #include "dixstruct.h" #include "extnsionst.h" #include <X11/X.h> diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_screen.c b/src/gallium/targets/xorg-vmwgfx/vmw_screen.c index 8173908f55..7662203165 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_screen.c +++ b/src/gallium/targets/xorg-vmwgfx/vmw_screen.c @@ -245,6 +245,7 @@ vmw_screen_pre_init(ScrnInfoPtr pScrn, int flags) cust->winsys_enter_vt = vmw_screen_enter_vt; cust->winsys_leave_vt = vmw_screen_leave_vt; cust->no_3d = TRUE; + cust->unhidden_hw_cursor_update = TRUE; vmw->pScrn = pScrn; pScrn->driverPrivate = cust; diff --git a/src/gallium/tests/python/retrace/interpreter.py b/src/gallium/tests/python/retrace/interpreter.py index 37d7fd6415..954a701a53 100755 --- a/src/gallium/tests/python/retrace/interpreter.py +++ b/src/gallium/tests/python/retrace/interpreter.py @@ -111,6 +111,7 @@ struct_factories = { #"pipe_texture": gallium.Texture, 'pipe_subresource': gallium.pipe_subresource, 'pipe_box': gallium.pipe_box, + 'pipe_draw_info': gallium.pipe_draw_info, } @@ -533,30 +534,22 @@ class Context(Object): return minindex + ibias, maxindex + ibias - def draw_arrays(self, mode, start, count): - self.dump_vertices(start, count) - - self.real.draw_arrays(mode, start, count) - self._set_dirty() - - def draw_elements(self, indexBuffer, indexSize, indexBias, mode, start, count): - if self.interpreter.verbosity(2): - minindex, maxindex = self.dump_indices(indexBuffer, indexSize, indexBias, start, count) - self.dump_vertices(minindex, maxindex - minindex) + def set_index_buffer(self, ib): + if ib: + self.real.set_index_buffer(ib.index_size, ib.offset, ib.buffer) + else: + self.real.set_index_buffer(0, 0, None) - self.real.draw_elements(indexBuffer, indexSize, indexBias, mode, start, count) - self._set_dirty() - - def draw_range_elements(self, indexBuffer, indexSize, indexBias, minIndex, maxIndex, mode, start, count): + def draw_vbo(self, info): if self.interpreter.verbosity(2): - minindex, maxindex = self.dump_indices(indexBuffer, indexSize, indexBias, start, count) - minindex = min(minindex, minIndex) - maxindex = min(maxindex, maxIndex) - self.dump_vertices(minindex, maxindex - minindex) + if 0: + minindex, maxindex = self.dump_indices(indexBuffer, indexSize, indexBias, start, count) + + self.dump_vertices(info.minindex, info.maxindex + 1 - info.minindex) - self.real.draw_range_elements(indexBuffer, indexSize, indexBias, minIndex, maxIndex, mode, start, count) + self.real.draw_vbo(info) self._set_dirty() - + def resource_copy_region(self, dst, subdst, dstx, dsty, dstz, src, subsrc, srcx, srcy, srcz, width, height): if dst is not None and src is not None: if self.interpreter.options.all: @@ -617,6 +610,15 @@ class Context(Object): _rgba[i] = rgba[i] self.real.clear(buffers, _rgba, depth, stencil) + def clear_render_target(self, dst, rgba, dstx, dsty, width, height): + _rgba = gallium.FloatArray(4) + for i in range(4): + _rgba[i] = rgba[i] + self.real.clear_render_target(dst, _rgba, dstx, dsty, width, height) + + def clear_depth_stencil(self, dst, clear_flags, depth, stencil, dstx, dsty, width, height): + self.real.clear_depth_stencil(dst, clear_flags, depth, stencil, dstx, dsty, width, height) + def _present(self): self.real.flush() diff --git a/src/gallium/tests/python/samples/tri.py b/src/gallium/tests/python/samples/tri.py index fed929d420..6d17c88c05 100644 --- a/src/gallium/tests/python/samples/tri.py +++ b/src/gallium/tests/python/samples/tri.py @@ -88,8 +88,8 @@ def test(dev): # rasterizer rasterizer = Rasterizer() - rasterizer.front_winding = PIPE_WINDING_CW - rasterizer.cull_mode = PIPE_WINDING_NONE + rasterizer.front_ccw = False + rasterizer.cull_face = PIPE_FACE_NONE rasterizer.scissor = 1 ctx.set_rasterizer(rasterizer) @@ -161,8 +161,8 @@ def test(dev): # vertex shader vs = Shader(''' VERT - DCL IN[0], POSITION, CONSTANT - DCL IN[1], COLOR, CONSTANT + DCL IN[0] + DCL IN[1] DCL OUT[0], POSITION, CONSTANT DCL OUT[1], COLOR, CONSTANT 0:MOV OUT[0], IN[0] diff --git a/src/gallium/tests/unit/u_format_test.c b/src/gallium/tests/unit/u_format_test.c index cfde6af75e..ba0dd17957 100644 --- a/src/gallium/tests/unit/u_format_test.c +++ b/src/gallium/tests/unit/u_format_test.c @@ -67,6 +67,7 @@ print_packed(const struct util_format_description *format_desc, sep = " "; } printf("%s", suffix); + fflush(stdout); } @@ -88,6 +89,7 @@ print_unpacked_rgba_doubl(const struct util_format_description *format_desc, sep = ",\n"; } printf("%s", suffix); + fflush(stdout); } @@ -109,6 +111,7 @@ print_unpacked_rgba_float(const struct util_format_description *format_desc, sep = ",\n"; } printf("%s", suffix); + fflush(stdout); } @@ -129,6 +132,7 @@ print_unpacked_rgba_8unorm(const struct util_format_description *format_desc, } } printf("%s", suffix); + fflush(stdout); } @@ -150,6 +154,7 @@ print_unpacked_z_float(const struct util_format_description *format_desc, sep = ",\n"; } printf("%s", suffix); + fflush(stdout); } @@ -170,6 +175,7 @@ print_unpacked_z_32unorm(const struct util_format_description *format_desc, } } printf("%s", suffix); + fflush(stdout); } @@ -190,6 +196,7 @@ print_unpacked_s_8uscaled(const struct util_format_description *format_desc, } } printf("%s", suffix); + fflush(stdout); } @@ -635,6 +642,7 @@ test_one_func(const struct util_format_description *format_desc, printf("Testing util_format_%s_%s ...\n", format_desc->short_name, suffix); + fflush(stdout); for (i = 0; i < util_format_nr_test_cases; ++i) { const struct util_format_test_case *test = &util_format_test_cases[i]; diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile index 9d8dc8dc59..a396205f89 100644 --- a/src/gallium/winsys/r600/drm/Makefile +++ b/src/gallium/winsys/r600/drm/Makefile @@ -6,18 +6,14 @@ LIBNAME = r600winsys C_SOURCES = \ bof.c \ - r600_state.c \ - r600_state2.c \ - r600.c \ - radeon_ctx.c \ - radeon_draw.c \ - radeon_state.c \ + evergreen_hw_context.c \ radeon_bo.c \ + radeon_bo_pb.c \ radeon_pciid.c \ - radeon.c \ + r600.c \ + r600_bo.c \ r600_drm.c \ - radeon_ws_bo.c \ - radeon_bo_pb.c + r600_hw_context.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \ $(shell pkg-config libdrm --cflags-only-I) diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript index 2f20d9f895..cc053c06dd 100644 --- a/src/gallium/winsys/r600/drm/SConscript +++ b/src/gallium/winsys/r600/drm/SConscript @@ -4,14 +4,14 @@ env = env.Clone() r600_sources = [ 'bof.c', - 'r600_state.c', - 'radeon_ctx.c', - 'radeon_draw.c', - 'radeon_state.c', + 'evergreen_hw_context.c', 'radeon_bo.c', + 'radeon_bo_pb.c', 'radeon_pciid.c', - 'radeon.c', - 'r600_drm.c' + 'r600.c', + 'r600_bo.c', + 'r600_drm.c', + 'r600_hw_context.c', ] env.ParseConfig('pkg-config --cflags libdrm_radeon') diff --git a/src/gallium/winsys/r600/drm/eg_states.h b/src/gallium/winsys/r600/drm/eg_states.h deleted file mode 100644 index c26ba6c6cd..0000000000 --- a/src/gallium/winsys/r600/drm/eg_states.h +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org> - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ -#ifndef EG_STATES_H -#define EG_STATES_H - -static const struct radeon_register EG_names_CONFIG[] = { - {0x00008C00, 0, 0, "SQ_CONFIG"}, - {0x00009100, 0, 0, "SPI_CONFIG_CNTL"}, - {0x0000913C, 0, 0, "SPI_CONFIG_CNTL_1"}, - {0x00008C04, 0, 0, "SQ_GPR_RESOURCE_MGMT_1"}, - {0x00008C08, 0, 0, "SQ_GPR_RESOURCE_MGMT_2"}, - {0x00008C0C, 0, 0, "SQ_GPR_RESOURCE_MGMT_3"}, - {0x00008C18, 0, 0, "SQ_THREAD_RESOURCE_MGMT_1"}, - {0x00008C1C, 0, 0, "SQ_THREAD_RESOURCE_MGMT_2"}, - {0x00008C20, 0, 0, "SQ_STACK_RESOURCE_MGMT_1"}, - {0x00008C24, 0, 0, "SQ_STACK_RESOURCE_MGMT_2"}, - {0x00008C28, 0, 0, "SQ_STACK_RESOURCE_MGMT_3"}, - {0x00008D8C, 0, 0, "SQ_DYN_GPR_CNTL_PS_FLUSH_REQ"}, - {0x00008A14, 0, 0, "PA_CL_ENHANCE"}, - {0x00028838, 0, 0, "SQ_DYN_GPR_RESOURCE_LIMIT_1"}, - {0x000288EC, 0, 0, "SQ_LDS_ALLOC_PS"}, - {0x00028350, 0, 0, "SX_MISC"}, - {0x00028900, 0, 0, "SQ_ESGS_RING_ITEMSIZE"}, - {0x00028904, 0, 0, "SQ_GSVS_RING_ITEMSIZE"}, - {0x00028908, 0, 0, "SQ_ESTMP_RING_ITEMSIZE"}, - {0x0002890C, 0, 0, "SQ_GSTMP_RING_ITEMSIZE"}, - {0x00028910, 0, 0, "SQ_VSTMP_RING_ITEMSIZE"}, - {0x00028914, 0, 0, "SQ_PSTMP_RING_ITEMSIZE"}, - {0x0002891C, 0, 0, "SQ_GS_VERT_ITEMSIZE"}, - {0x00028920, 0, 0, "SQ_GS_VERT_ITEMSIZE_1"}, - {0x00028924, 0, 0, "SQ_GS_VERT_ITEMSIZE_2"}, - {0x00028928, 0, 0, "SQ_GS_VERT_ITEMSIZE_3"}, - {0x00028A10, 0, 0, "VGT_OUTPUT_PATH_CNTL"}, - {0x00028A14, 0, 0, "VGT_HOS_CNTL"}, - {0x00028A18, 0, 0, "VGT_HOS_MAX_TESS_LEVEL"}, - {0x00028A1C, 0, 0, "VGT_HOS_MIN_TESS_LEVEL"}, - {0x00028A20, 0, 0, "VGT_HOS_REUSE_DEPTH"}, - {0x00028A24, 0, 0, "VGT_GROUP_PRIM_TYPE"}, - {0x00028A28, 0, 0, "VGT_GROUP_FIRST_DECR"}, - {0x00028A2C, 0, 0, "VGT_GROUP_DECR"}, - {0x00028A30, 0, 0, "VGT_GROUP_VECT_0_CNTL"}, - {0x00028A34, 0, 0, "VGT_GROUP_VECT_1_CNTL"}, - {0x00028A38, 0, 0, "VGT_GROUP_VECT_0_FMT_CNTL"}, - {0x00028A3C, 0, 0, "VGT_GROUP_VECT_1_FMT_CNTL"}, - {0x00028A40, 0, 0, "VGT_GS_MODE"}, - {0x00028A48, 0, 0, "PA_SC_MODE_CNTL_0"}, - {0x00028A4C, 0, 0, "PA_SC_MODE_CNTL_1"}, - {0x00028AB4, 0, 0, "VGT_REUSE_OFF"}, - {0x00028AB8, 0, 0, "VGT_VTX_CNT_EN"}, - {0x00028B54, 0, 0, "VGT_SHADER_STAGES_EN"}, - {0x00028B94, 0, 0, "VGT_STRMOUT_CONFIG"}, - {0x00028B98, 0, 0, "VGT_STRMOUT_BUFFER_CONFIG"}, -}; - -static const struct radeon_register EG_names_CB_CNTL[] = { - {0x00028238, 0, 0, "CB_TARGET_MASK"}, - {0x0002823C, 0, 0, "CB_SHADER_MASK"}, - {0x00028808, 0, 0, "CB_COLOR_CONTROL"}, - {0x00028C04, 0, 0, "PA_SC_AA_CONFIG"}, - {0x00028C1C, 0, 0, "PA_SC_AA_SAMPLE_LOCS_MCTX"}, - {0x00028C3C, 0, 0, "PA_SC_AA_MASK"}, -}; - -static const struct radeon_register EG_names_RASTERIZER[] = { - {0x000286D4, 0, 0, "SPI_INTERP_CONTROL_0"}, - {0x00028810, 0, 0, "PA_CL_CLIP_CNTL"}, - {0x00028814, 0, 0, "PA_SU_SC_MODE_CNTL"}, - {0x0002881C, 0, 0, "PA_CL_VS_OUT_CNTL"}, - {0x00028820, 0, 0, "PA_CL_NANINF_CNTL"}, - {0x00028A00, 0, 0, "PA_SU_POINT_SIZE"}, - {0x00028A04, 0, 0, "PA_SU_POINT_MINMAX"}, - {0x00028A08, 0, 0, "PA_SU_LINE_CNTL"}, - {0x00028A48, 0, 0, "PA_SC_MPASS_PS_CNTL"}, - {0x00028C00, 0, 0, "PA_SC_LINE_CNTL"}, - {0x00028C08, 0, 0, "PA_SU_VTX_CNTL"}, - {0x00028C0C, 0, 0, "PA_CL_GB_VERT_CLIP_ADJ"}, - {0x00028C10, 0, 0, "PA_CL_GB_VERT_DISC_ADJ"}, - {0x00028C14, 0, 0, "PA_CL_GB_HORZ_CLIP_ADJ"}, - {0x00028C18, 0, 0, "PA_CL_GB_HORZ_DISC_ADJ"}, - {0x00028B78, 0, 0, "PA_SU_POLY_OFFSET_DB_FMT_CNTL"}, - {0x00028B7C, 0, 0, "PA_SU_POLY_OFFSET_CLAMP"}, - {0x00028B80, 0, 0, "PA_SU_POLY_OFFSET_FRONT_SCALE"}, - {0x00028B84, 0, 0, "PA_SU_POLY_OFFSET_FRONT_OFFSET"}, - {0x00028B88, 0, 0, "PA_SU_POLY_OFFSET_BACK_SCALE"}, - {0x00028B8C, 0, 0, "PA_SU_POLY_OFFSET_BACK_OFFSET"}, -}; - -/* Viewport states are same as r600 */ -static const struct radeon_register EG_names_VIEWPORT[] = { - {0x000282D0, 0, 0, "PA_SC_VPORT_ZMIN_0"}, - {0x000282D4, 0, 0, "PA_SC_VPORT_ZMAX_0"}, - {0x0002843C, 0, 0, "PA_CL_VPORT_XSCALE_0"}, - {0x00028444, 0, 0, "PA_CL_VPORT_YSCALE_0"}, - {0x0002844C, 0, 0, "PA_CL_VPORT_ZSCALE_0"}, - {0x00028440, 0, 0, "PA_CL_VPORT_XOFFSET_0"}, - {0x00028448, 0, 0, "PA_CL_VPORT_YOFFSET_0"}, - {0x00028450, 0, 0, "PA_CL_VPORT_ZOFFSET_0"}, - {0x00028818, 0, 0, "PA_CL_VTE_CNTL"}, -}; - -/* scissor is same as R600 */ -static const struct radeon_register EG_names_SCISSOR[] = { - {0x00028030, 0, 0, "PA_SC_SCREEN_SCISSOR_TL"}, - {0x00028034, 0, 0, "PA_SC_SCREEN_SCISSOR_BR"}, - {0x00028200, 0, 0, "PA_SC_WINDOW_OFFSET"}, - {0x00028204, 0, 0, "PA_SC_WINDOW_SCISSOR_TL"}, - {0x00028208, 0, 0, "PA_SC_WINDOW_SCISSOR_BR"}, - {0x0002820C, 0, 0, "PA_SC_CLIPRECT_RULE"}, - {0x00028210, 0, 0, "PA_SC_CLIPRECT_0_TL"}, - {0x00028214, 0, 0, "PA_SC_CLIPRECT_0_BR"}, - {0x00028218, 0, 0, "PA_SC_CLIPRECT_1_TL"}, - {0x0002821C, 0, 0, "PA_SC_CLIPRECT_1_BR"}, - {0x00028220, 0, 0, "PA_SC_CLIPRECT_2_TL"}, - {0x00028224, 0, 0, "PA_SC_CLIPRECT_2_BR"}, - {0x00028228, 0, 0, "PA_SC_CLIPRECT_3_TL"}, - {0x0002822C, 0, 0, "PA_SC_CLIPRECT_3_BR"}, - {0x00028230, 0, 0, "PA_SC_EDGERULE"}, - {0x00028240, 0, 0, "PA_SC_GENERIC_SCISSOR_TL"}, - {0x00028244, 0, 0, "PA_SC_GENERIC_SCISSOR_BR"}, - {0x00028250, 0, 0, "PA_SC_VPORT_SCISSOR_0_TL"}, - {0x00028254, 0, 0, "PA_SC_VPORT_SCISSOR_0_BR"}, - {0x00028234, 0, 0, "PA_SU_HARDWARE_SCREEN_OFFSET"}, -}; - -/* same as r700 i.e. no blend control */ -static const struct radeon_register EG_names_BLEND[] = { - {0x00028414, 0, 0, "CB_BLEND_RED"}, - {0x00028418, 0, 0, "CB_BLEND_GREEN"}, - {0x0002841C, 0, 0, "CB_BLEND_BLUE"}, - {0x00028420, 0, 0, "CB_BLEND_ALPHA"}, - {0x00028780, 0, 0, "CB_BLEND0_CONTROL"}, - {0x00028784, 0, 0, "CB_BLEND1_CONTROL"}, - {0x00028788, 0, 0, "CB_BLEND2_CONTROL"}, - {0x0002878C, 0, 0, "CB_BLEND3_CONTROL"}, - {0x00028790, 0, 0, "CB_BLEND4_CONTROL"}, - {0x00028794, 0, 0, "CB_BLEND5_CONTROL"}, - {0x00028798, 0, 0, "CB_BLEND6_CONTROL"}, - {0x0002879C, 0, 0, "CB_BLEND7_CONTROL"}, -}; - -/* different */ -static const struct radeon_register EG_names_DSA[] = { - {0x00028028, 0, 0, "DB_STENCIL_CLEAR"}, - {0x0002802C, 0, 0, "DB_DEPTH_CLEAR"}, - {0x00028410, 0, 0, "SX_ALPHA_TEST_CONTROL"}, - {0x00028430, 0, 0, "DB_STENCILREFMASK"}, - {0x00028434, 0, 0, "DB_STENCILREFMASK_BF"}, - {0x00028438, 0, 0, "SX_ALPHA_REF"}, - {0x000286DC, 0, 0, "SPI_FOG_CNTL"}, - {0x00028800, 0, 0, "DB_DEPTH_CONTROL"}, - {0x0002880C, 0, 0, "DB_SHADER_CONTROL"}, - {0x00028000, 0, 0, "DB_RENDER_CONTROL"}, - {0x0002800C, 0, 0, "DB_RENDER_OVERRIDE"}, - {0x00028010, 0, 0, "DB_RENDER_OVERRIDE2"}, - {0x00028AC0, 0, 0, "DB_SRESULTS_COMPARE_STATE0"}, - {0x00028AC4, 0, 0, "DB_SRESULTS_COMPARE_STATE1"}, - {0x00028AC8, 0, 0, "DB_PRELOAD_CONTROL"}, - {0x00028B70, 0, 0, "DB_ALPHA_TO_MASK"}, -}; - -/* different */ -static const struct radeon_register EG_names_VS_SHADER[] = { - {0x00028380, 0, 0, "SQ_VTX_SEMANTIC_0"}, - {0x00028384, 0, 0, "SQ_VTX_SEMANTIC_1"}, - {0x00028388, 0, 0, "SQ_VTX_SEMANTIC_2"}, - {0x0002838C, 0, 0, "SQ_VTX_SEMANTIC_3"}, - {0x00028390, 0, 0, "SQ_VTX_SEMANTIC_4"}, - {0x00028394, 0, 0, "SQ_VTX_SEMANTIC_5"}, - {0x00028398, 0, 0, "SQ_VTX_SEMANTIC_6"}, - {0x0002839C, 0, 0, "SQ_VTX_SEMANTIC_7"}, - {0x000283A0, 0, 0, "SQ_VTX_SEMANTIC_8"}, - {0x000283A4, 0, 0, "SQ_VTX_SEMANTIC_9"}, - {0x000283A8, 0, 0, "SQ_VTX_SEMANTIC_10"}, - {0x000283AC, 0, 0, "SQ_VTX_SEMANTIC_11"}, - {0x000283B0, 0, 0, "SQ_VTX_SEMANTIC_12"}, - {0x000283B4, 0, 0, "SQ_VTX_SEMANTIC_13"}, - {0x000283B8, 0, 0, "SQ_VTX_SEMANTIC_14"}, - {0x000283BC, 0, 0, "SQ_VTX_SEMANTIC_15"}, - {0x000283C0, 0, 0, "SQ_VTX_SEMANTIC_16"}, - {0x000283C4, 0, 0, "SQ_VTX_SEMANTIC_17"}, - {0x000283C8, 0, 0, "SQ_VTX_SEMANTIC_18"}, - {0x000283CC, 0, 0, "SQ_VTX_SEMANTIC_19"}, - {0x000283D0, 0, 0, "SQ_VTX_SEMANTIC_20"}, - {0x000283D4, 0, 0, "SQ_VTX_SEMANTIC_21"}, - {0x000283D8, 0, 0, "SQ_VTX_SEMANTIC_22"}, - {0x000283DC, 0, 0, "SQ_VTX_SEMANTIC_23"}, - {0x000283E0, 0, 0, "SQ_VTX_SEMANTIC_24"}, - {0x000283E4, 0, 0, "SQ_VTX_SEMANTIC_25"}, - {0x000283E8, 0, 0, "SQ_VTX_SEMANTIC_26"}, - {0x000283EC, 0, 0, "SQ_VTX_SEMANTIC_27"}, - {0x000283F0, 0, 0, "SQ_VTX_SEMANTIC_28"}, - {0x000283F4, 0, 0, "SQ_VTX_SEMANTIC_29"}, - {0x000283F8, 0, 0, "SQ_VTX_SEMANTIC_30"}, - {0x000283FC, 0, 0, "SQ_VTX_SEMANTIC_31"}, - {0x0002861C, 0, 0, "SPI_VS_OUT_ID_0"}, // all diff belwo - {0x00028620, 0, 0, "SPI_VS_OUT_ID_1"}, - {0x00028624, 0, 0, "SPI_VS_OUT_ID_2"}, - {0x00028628, 0, 0, "SPI_VS_OUT_ID_3"}, - {0x0002862C, 0, 0, "SPI_VS_OUT_ID_4"}, - {0x00028630, 0, 0, "SPI_VS_OUT_ID_5"}, - {0x00028634, 0, 0, "SPI_VS_OUT_ID_6"}, - {0x00028638, 0, 0, "SPI_VS_OUT_ID_7"}, - {0x0002863C, 0, 0, "SPI_VS_OUT_ID_8"}, - {0x00028640, 0, 0, "SPI_VS_OUT_ID_9"}, - {0x000286C4, 0, 0, "SPI_VS_OUT_CONFIG"}, - {0x0002885C, 1, 0, "SQ_PGM_START_VS"}, - {0x00028860, 0, 0, "SQ_PGM_RESOURCES_VS"}, - {0x00028864, 0, 0, "SQ_PGM_RESOURCES_2_VS"}, - {0x000288A4, 1, 1, "SQ_PGM_START_FS"}, - {0x000288A8, 0, 0, "SQ_PGM_RESOURCES_FS"}, -}; - -static const struct radeon_register EG_names_PS_SHADER[] = { - {0x00028644, 0, 0, "SPI_PS_INPUT_CNTL_0"}, - {0x00028648, 0, 0, "SPI_PS_INPUT_CNTL_1"}, - {0x0002864C, 0, 0, "SPI_PS_INPUT_CNTL_2"}, - {0x00028650, 0, 0, "SPI_PS_INPUT_CNTL_3"}, - {0x00028654, 0, 0, "SPI_PS_INPUT_CNTL_4"}, - {0x00028658, 0, 0, "SPI_PS_INPUT_CNTL_5"}, - {0x0002865C, 0, 0, "SPI_PS_INPUT_CNTL_6"}, - {0x00028660, 0, 0, "SPI_PS_INPUT_CNTL_7"}, - {0x00028664, 0, 0, "SPI_PS_INPUT_CNTL_8"}, - {0x00028668, 0, 0, "SPI_PS_INPUT_CNTL_9"}, - {0x0002866C, 0, 0, "SPI_PS_INPUT_CNTL_10"}, - {0x00028670, 0, 0, "SPI_PS_INPUT_CNTL_11"}, - {0x00028674, 0, 0, "SPI_PS_INPUT_CNTL_12"}, - {0x00028678, 0, 0, "SPI_PS_INPUT_CNTL_13"}, - {0x0002867C, 0, 0, "SPI_PS_INPUT_CNTL_14"}, - {0x00028680, 0, 0, "SPI_PS_INPUT_CNTL_15"}, - {0x00028684, 0, 0, "SPI_PS_INPUT_CNTL_16"}, - {0x00028688, 0, 0, "SPI_PS_INPUT_CNTL_17"}, - {0x0002868C, 0, 0, "SPI_PS_INPUT_CNTL_18"}, - {0x00028690, 0, 0, "SPI_PS_INPUT_CNTL_19"}, - {0x00028694, 0, 0, "SPI_PS_INPUT_CNTL_20"}, - {0x00028698, 0, 0, "SPI_PS_INPUT_CNTL_21"}, - {0x0002869C, 0, 0, "SPI_PS_INPUT_CNTL_22"}, - {0x000286A0, 0, 0, "SPI_PS_INPUT_CNTL_23"}, - {0x000286A4, 0, 0, "SPI_PS_INPUT_CNTL_24"}, - {0x000286A8, 0, 0, "SPI_PS_INPUT_CNTL_25"}, - {0x000286AC, 0, 0, "SPI_PS_INPUT_CNTL_26"}, - {0x000286B0, 0, 0, "SPI_PS_INPUT_CNTL_27"}, - {0x000286B4, 0, 0, "SPI_PS_INPUT_CNTL_28"}, - {0x000286B8, 0, 0, "SPI_PS_INPUT_CNTL_29"}, - {0x000286BC, 0, 0, "SPI_PS_INPUT_CNTL_30"}, - {0x000286C0, 0, 0, "SPI_PS_INPUT_CNTL_31"}, - {0x000286C8, 0, 0, "SPI_THREAD_GROUPING"}, - {0x000286CC, 0, 0, "SPI_PS_IN_CONTROL_0"}, - {0x000286D0, 0, 0, "SPI_PS_IN_CONTROL_1"}, - {0x000286D8, 0, 0, "SPI_INPUT_Z"}, - {0x000286E0, 0, 0, "SPI_BARYC_CNTL"}, - {0x000286E4, 0, 0, "SPI_PS_IN_CONTROL_2"}, - {0x000286E8, 0, 0, "SPI_COMPUTE_INPUT_CNTL"}, - {0x00028840, 1, 0, "SQ_PGM_START_PS"}, // diff - {0x00028844, 0, 0, "SQ_PGM_RESOURCES_PS"}, // diff - {0x00028848, 0, 0, "SQ_PGM_RESOURCES_2_PS"}, // diff - {0x0002884C, 0, 0, "SQ_PGM_EXPORTS_PS"}, // diff -}; - -/* different */ -static const struct radeon_register EG_names_UCP[] = { - {0x000285BC, 0, 0, "PA_CL_UCP0_X"}, - {0x000285C0, 0, 0, "PA_CL_UCP0_Y"}, - {0x000285C4, 0, 0, "PA_CL_UCP0_Z"}, - {0x000285C8, 0, 0, "PA_CL_UCP0_W"}, - {0x000285CC, 0, 0, "PA_CL_UCP1_X"}, - {0x000285D0, 0, 0, "PA_CL_UCP1_Y"}, - {0x000285D4, 0, 0, "PA_CL_UCP1_Z"}, - {0x000285D8, 0, 0, "PA_CL_UCP1_W"}, - {0x000285DC, 0, 0, "PA_CL_UCP2_X"}, - {0x000285E0, 0, 0, "PA_CL_UCP2_Y"}, - {0x000285E4, 0, 0, "PA_CL_UCP2_Z"}, - {0x000285E8, 0, 0, "PA_CL_UCP2_W"}, - {0x000285EC, 0, 0, "PA_CL_UCP3_X"}, - {0x000285F0, 0, 0, "PA_CL_UCP3_Y"}, - {0x000285F4, 0, 0, "PA_CL_UCP3_Z"}, - {0x000285F8, 0, 0, "PA_CL_UCP3_W"}, - {0x000285FC, 0, 0, "PA_CL_UCP4_X"}, - {0x00028600, 0, 0, "PA_CL_UCP4_Y"}, - {0x00028604, 0, 0, "PA_CL_UCP4_Z"}, - {0x00028608, 0, 0, "PA_CL_UCP4_W"}, - {0x0002860C, 0, 0, "PA_CL_UCP5_X"}, - {0x00028610, 0, 0, "PA_CL_UCP5_Y"}, - {0x00028614, 0, 0, "PA_CL_UCP5_Z"}, - {0x0002861C, 0, 0, "PA_CL_UCP5_W"}, -}; - -static const struct radeon_register EG_names_VS_CBUF[] = { - {0x00028180, 0, 0, "ALU_CONST_BUFFER_SIZE_VS_0"}, - {0x00028980, 1, 0, "ALU_CONST_CACHE_VS_0"}, -}; - -static const struct radeon_register EG_names_PS_CBUF[] = { - {0x00028140, 0, 0, "ALU_CONST_BUFFER_SIZE_PS_0"}, - {0x00028940, 1, 0, "ALU_CONST_CACHE_PS_0"}, -}; - -static const struct radeon_register EG_names_PS_RESOURCE[] = { - {0x00030000, 0, 0, "RESOURCE0_WORD0"}, - {0x00030004, 0, 0, "RESOURCE0_WORD1"}, - {0x00030008, 0, 0, "RESOURCE0_WORD2"}, - {0x0003000C, 0, 0, "RESOURCE0_WORD3"}, - {0x00030010, 0, 0, "RESOURCE0_WORD4"}, - {0x00030014, 0, 0, "RESOURCE0_WORD5"}, - {0x00030018, 0, 0, "RESOURCE0_WORD6"}, - {0x0003001c, 0, 0, "RESOURCE0_WORD7"}, -}; - -static const struct radeon_register EG_names_VS_RESOURCE[] = { - {0x00031600, 0, 0, "RESOURCE160_WORD0"}, - {0x00031604, 0, 0, "RESOURCE160_WORD1"}, - {0x00031608, 0, 0, "RESOURCE160_WORD2"}, - {0x0003160C, 0, 0, "RESOURCE160_WORD3"}, - {0x00031610, 0, 0, "RESOURCE160_WORD4"}, - {0x00031614, 0, 0, "RESOURCE160_WORD5"}, - {0x00031618, 0, 0, "RESOURCE160_WORD6"}, - {0x0003161c, 0, 0, "RESOURCE160_WORD7"}, -}; - -static const struct radeon_register EG_names_FS_RESOURCE[] = { - {0x0003A300, 0, 0, "RESOURCE320_WORD0"}, - {0x0003A304, 0, 0, "RESOURCE320_WORD1"}, - {0x0003A308, 0, 0, "RESOURCE320_WORD2"}, - {0x0003A30C, 0, 0, "RESOURCE320_WORD3"}, - {0x0003A310, 0, 0, "RESOURCE320_WORD4"}, - {0x0003A314, 0, 0, "RESOURCE320_WORD5"}, - {0x0003A318, 0, 0, "RESOURCE320_WORD6"}, - {0x0003A31C, 0, 0, "RESOURCE320_WORD7"}, -}; - -static const struct radeon_register EG_names_GS_RESOURCE[] = { - {0x0003A4C0, 0, 0, "RESOURCE336_WORD0"}, - {0x0003A4C4, 0, 0, "RESOURCE336_WORD1"}, - {0x0003A4C8, 0, 0, "RESOURCE336_WORD2"}, - {0x0003A4CC, 0, 0, "RESOURCE336_WORD3"}, - {0x0003A4D0, 0, 0, "RESOURCE336_WORD4"}, - {0x0003A4D4, 0, 0, "RESOURCE336_WORD5"}, - {0x0003A4D8, 0, 0, "RESOURCE336_WORD6"}, - {0x0003A4DC, 0, 0, "RESOURCE336_WORD7"}, -}; - -static const struct radeon_register EG_names_PS_SAMPLER[] = { - {0x0003C000, 0, 0, "SQ_TEX_SAMPLER_WORD0_0"}, - {0x0003C004, 0, 0, "SQ_TEX_SAMPLER_WORD1_0"}, - {0x0003C008, 0, 0, "SQ_TEX_SAMPLER_WORD2_0"}, -}; - -static const struct radeon_register EG_names_VS_SAMPLER[] = { - {0x0003C0D8, 0, 0, "SQ_TEX_SAMPLER_WORD0_18"}, - {0x0003C0DC, 0, 0, "SQ_TEX_SAMPLER_WORD1_18"}, - {0x0003C0E0, 0, 0, "SQ_TEX_SAMPLER_WORD2_18"}, -}; - -static const struct radeon_register EG_names_GS_SAMPLER[] = { - {0x0003C1B0, 0, 0, "SQ_TEX_SAMPLER_WORD0_36"}, - {0x0003C1B4, 0, 0, "SQ_TEX_SAMPLER_WORD1_36"}, - {0x0003C1B8, 0, 0, "SQ_TEX_SAMPLER_WORD2_36"}, -}; - -static const struct radeon_register EG_names_PS_SAMPLER_BORDER[] = { - {0x0000A400, 0, 0, "TD_PS_SAMPLER0_BORDER_RED"}, - {0x0000A404, 0, 0, "TD_PS_SAMPLER0_BORDER_GREEN"}, - {0x0000A408, 0, 0, "TD_PS_SAMPLER0_BORDER_BLUE"}, - {0x0000A40C, 0, 0, "TD_PS_SAMPLER0_BORDER_ALPHA"}, -}; - -static const struct radeon_register EG_names_VS_SAMPLER_BORDER[] = { - {0x0000A600, 0, 0, "TD_VS_SAMPLER0_BORDER_RED"}, - {0x0000A604, 0, 0, "TD_VS_SAMPLER0_BORDER_GREEN"}, - {0x0000A608, 0, 0, "TD_VS_SAMPLER0_BORDER_BLUE"}, - {0x0000A60C, 0, 0, "TD_VS_SAMPLER0_BORDER_ALPHA"}, -}; - -static const struct radeon_register EG_names_GS_SAMPLER_BORDER[] = { - {0x0000A800, 0, 0, "TD_GS_SAMPLER0_BORDER_RED"}, - {0x0000A804, 0, 0, "TD_GS_SAMPLER0_BORDER_GREEN"}, - {0x0000A808, 0, 0, "TD_GS_SAMPLER0_BORDER_BLUE"}, - {0x0000A80C, 0, 0, "TD_GS_SAMPLER0_BORDER_ALPHA"}, -}; - -static const struct radeon_register EG_names_CB[] = { - {0x00028C60, 1, 0, "CB_COLOR0_BASE"}, - {0x00028C64, 0, 0, "CB_COLOR0_PITCH"}, - {0x00028C68, 0, 0, "CB_COLOR0_SLICE"}, - {0x00028C6C, 0, 0, "CB_COLOR0_VIEW"}, - {0x00028C70, 1, 0, "CB_COLOR0_INFO"}, - {0x00028C74, 0, 0, "CB_COLOR0_ATTRIB"}, - {0x00028C78, 0, 0, "CB_COLOR0_DIM"}, -}; - -/* different - TODO */ -static const struct radeon_register EG_names_DB[] = { - {0x00028014, 1, 0, "DB_HTILE_DATA_BASE"}, - {0x00028040, 1, 0, "DB_Z_INFO"}, - {0x00028044, 0, 0, "DB_STENCIL_INFO"}, - {0x00028058, 0, 0, "DB_DEPTH_SIZE"}, - {0x0002805C, 0, 0, "DB_DEPTH_SLICE"}, - {0x00028008, 0, 0, "DB_DEPTH_VIEW"}, - {0x00028ABC, 0, 0, "DB_HTILE_SURFACE"}, - {0x00028048, 1, 0, "DB_Z_READ_BASE"}, - {0x0002804C, 1, 0, "DB_STENCIL_READ_BASE"}, - {0x00028050, 1, 0, "DB_Z_WRITE_BASE"}, - {0x00028054, 1, 0, "DB_STENCIL_WRITE_BASE"}, -}; - -static const struct radeon_register EG_names_VGT[] = { - {0x00008958, 0, 0, "VGT_PRIMITIVE_TYPE"}, //s - {0x00028400, 0, 0, "VGT_MAX_VTX_INDX"}, //s - {0x00028404, 0, 0, "VGT_MIN_VTX_INDX"}, //s - {0x00028408, 0, 0, "VGT_INDX_OFFSET"}, //s - {0x00028A7C, 0, 0, "VGT_DMA_INDEX_TYPE"}, //s - {0x00028A84, 0, 0, "VGT_PRIMITIVEID_EN"}, //s - {0x00028A88, 0, 0, "VGT_DMA_NUM_INSTANCES"}, //s - {0x00028A94, 0, 0, "VGT_MULTI_PRIM_IB_RESET_EN"}, //s - {0x00028AA0, 0, 0, "VGT_INSTANCE_STEP_RATE_0"}, //s - {0x00028AA4, 0, 0, "VGT_INSTANCE_STEP_RATE_1"}, //s -}; - -static const struct radeon_register EG_names_DRAW[] = { - {0x00008970, 0, 0, "VGT_NUM_INDICES"}, - {0x000287E4, 0, 0, "VGT_DMA_BASE_HI"}, //same - {0x000287E8, 1, 0, "VGT_DMA_BASE"}, //same - {0x000287F0, 0, 0, "VGT_DRAW_INITIATOR"}, //same -}; - -static const struct radeon_register EG_names_VGT_EVENT[] = { - {0x00028A90, 1, 0, "VGT_EVENT_INITIATOR"}, //done -}; - -static const struct radeon_register EG_names_CB_FLUSH[] = { -}; - -static const struct radeon_register EG_names_DB_FLUSH[] = { -}; - -#endif diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c new file mode 100644 index 0000000000..7f21b53ace --- /dev/null +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -0,0 +1,919 @@ +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include "xf86drm.h" +#include "r600.h" +#include "evergreend.h" +#include "radeon_drm.h" +#include "bof.h" +#include "pipe/p_compiler.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include <pipebuffer/pb_bufmgr.h> +#include "r600_priv.h" + +#define GROUP_FORCE_NEW_BLOCK 0 + +static const struct r600_reg evergreen_config_reg_list[] = { + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008A14_PA_CL_ENHANCE, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C00_SQ_CONFIG, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C0C_SQ_THREAD_RESOURCE_MGMT, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C28_SQ_STACK_RESOURCE_MGMT_3, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_009100_SPI_CONFIG_CNTL, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0}, +}; + +static const struct r600_reg evergreen_ctl_const_list[] = { + {PKT3_SET_CTL_CONST, EVERGREEN_CTL_CONST_OFFSET, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0, 0}, + {PKT3_SET_CTL_CONST, EVERGREEN_CTL_CONST_OFFSET, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0, 0}, +}; + +static const struct r600_reg evergreen_context_reg_list[] = { + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028000_DB_RENDER_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028004_DB_COUNT_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028008_DB_DEPTH_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02800C_DB_RENDER_OVERRIDE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028010_DB_RENDER_OVERRIDE2, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028014_DB_HTILE_DATA_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028028_DB_STENCIL_CLEAR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02802C_DB_DEPTH_CLEAR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028034_PA_SC_SCREEN_SCISSOR_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028040_DB_Z_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028044_DB_STENCIL_INFO, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028048_DB_Z_READ_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02804C_DB_STENCIL_READ_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028050_DB_Z_WRITE_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028054_DB_STENCIL_WRITE_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028058_DB_DEPTH_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02805C_DB_DEPTH_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028200_PA_SC_WINDOW_OFFSET, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02820C_PA_SC_CLIPRECT_RULE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028210_PA_SC_CLIPRECT_0_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028214_PA_SC_CLIPRECT_0_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028218_PA_SC_CLIPRECT_1_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02821C_PA_SC_CLIPRECT_1_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028220_PA_SC_CLIPRECT_2_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028224_PA_SC_CLIPRECT_2_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028228_PA_SC_CLIPRECT_3_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02822C_PA_SC_CLIPRECT_3_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028230_PA_SC_EDGERULE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028238_CB_TARGET_MASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02823C_CB_SHADER_MASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028240_PA_SC_GENERIC_SCISSOR_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028244_PA_SC_GENERIC_SCISSOR_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028350_SX_MISC, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028380_SQ_VTX_SEMANTIC_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028384_SQ_VTX_SEMANTIC_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028388_SQ_VTX_SEMANTIC_2, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02838C_SQ_VTX_SEMANTIC_3, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028390_SQ_VTX_SEMANTIC_4, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028394_SQ_VTX_SEMANTIC_5, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028398_SQ_VTX_SEMANTIC_6, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02839C_SQ_VTX_SEMANTIC_7, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283A0_SQ_VTX_SEMANTIC_8, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283A4_SQ_VTX_SEMANTIC_9, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283A8_SQ_VTX_SEMANTIC_10, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283AC_SQ_VTX_SEMANTIC_11, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283B0_SQ_VTX_SEMANTIC_12, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283B4_SQ_VTX_SEMANTIC_13, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283B8_SQ_VTX_SEMANTIC_14, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283BC_SQ_VTX_SEMANTIC_15, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283C0_SQ_VTX_SEMANTIC_16, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283C4_SQ_VTX_SEMANTIC_17, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283C8_SQ_VTX_SEMANTIC_18, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283CC_SQ_VTX_SEMANTIC_19, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283D0_SQ_VTX_SEMANTIC_20, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283D4_SQ_VTX_SEMANTIC_21, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283D8_SQ_VTX_SEMANTIC_22, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283DC_SQ_VTX_SEMANTIC_23, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283E0_SQ_VTX_SEMANTIC_24, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283E4_SQ_VTX_SEMANTIC_25, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283E8_SQ_VTX_SEMANTIC_26, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283EC_SQ_VTX_SEMANTIC_27, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283F0_SQ_VTX_SEMANTIC_28, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028400_VGT_MAX_VTX_INDX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028408_VGT_INDX_OFFSET, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028414_CB_BLEND_RED, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028418_CB_BLEND_GREEN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02841C_CB_BLEND_BLUE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028420_CB_BLEND_ALPHA, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028430_DB_STENCILREFMASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028434_DB_STENCILREFMASK_BF, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028438_SX_ALPHA_REF, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02843C_PA_CL_VPORT_XSCALE_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028440_PA_CL_VPORT_XOFFSET_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028444_PA_CL_VPORT_YSCALE_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028448_PA_CL_VPORT_YOFFSET_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02844C_PA_CL_VPORT_ZSCALE_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028450_PA_CL_VPORT_ZOFFSET_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285BC_PA_CL_UCP0_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285C0_PA_CL_UCP0_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285C4_PA_CL_UCP0_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285C8_PA_CL_UCP0_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285CC_PA_CL_UCP1_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285D0_PA_CL_UCP1_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285D4_PA_CL_UCP1_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285D8_PA_CL_UCP1_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285DC_PA_CL_UCP2_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285E0_PA_CL_UCP2_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285E4_PA_CL_UCP2_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285E8_PA_CL_UCP2_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285EC_PA_CL_UCP3_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285F0_PA_CL_UCP3_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285F4_PA_CL_UCP3_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285F8_PA_CL_UCP3_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285FC_PA_CL_UCP4_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028600_PA_CL_UCP4_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028604_PA_CL_UCP4_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028608_PA_CL_UCP4_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02860C_PA_CL_UCP5_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028610_PA_CL_UCP5_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028614_PA_CL_UCP5_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028618_PA_CL_UCP5_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02861C_SPI_VS_OUT_ID_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028620_SPI_VS_OUT_ID_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028624_SPI_VS_OUT_ID_2, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028628_SPI_VS_OUT_ID_3, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02862C_SPI_VS_OUT_ID_4, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028630_SPI_VS_OUT_ID_5, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028634_SPI_VS_OUT_ID_6, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028638_SPI_VS_OUT_ID_7, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02863C_SPI_VS_OUT_ID_8, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028640_SPI_VS_OUT_ID_9, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028644_SPI_PS_INPUT_CNTL_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028648_SPI_PS_INPUT_CNTL_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028650_SPI_PS_INPUT_CNTL_3, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028654_SPI_PS_INPUT_CNTL_4, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028658_SPI_PS_INPUT_CNTL_5, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028660_SPI_PS_INPUT_CNTL_7, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028664_SPI_PS_INPUT_CNTL_8, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028668_SPI_PS_INPUT_CNTL_9, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028670_SPI_PS_INPUT_CNTL_11, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028674_SPI_PS_INPUT_CNTL_12, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028678_SPI_PS_INPUT_CNTL_13, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028680_SPI_PS_INPUT_CNTL_15, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028684_SPI_PS_INPUT_CNTL_16, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028688_SPI_PS_INPUT_CNTL_17, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028690_SPI_PS_INPUT_CNTL_19, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028694_SPI_PS_INPUT_CNTL_20, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028698_SPI_PS_INPUT_CNTL_21, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286C8_SPI_THREAD_GROUPING, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286D4_SPI_INTERP_CONTROL_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286D8_SPI_INPUT_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286DC_SPI_FOG_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286E0_SPI_BARYC_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028780_CB_BLEND0_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028784_CB_BLEND1_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028788_CB_BLEND2_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02878C_CB_BLEND3_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028790_CB_BLEND4_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028794_CB_BLEND5_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028798_CB_BLEND6_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02879C_CB_BLEND7_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028800_DB_DEPTH_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02880C_DB_SHADER_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028808_CB_COLOR_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028810_PA_CL_CLIP_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028814_PA_SU_SC_MODE_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028818_PA_CL_VTE_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028820_PA_CL_NANINF_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028840_SQ_PGM_START_PS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028844_SQ_PGM_RESOURCES_PS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028848_SQ_PGM_RESOURCES_2_PS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02884C_SQ_PGM_EXPORTS_PS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02885C_SQ_PGM_START_VS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028860_SQ_PGM_RESOURCES_VS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028864_SQ_PGM_RESOURCES_2_VS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0288A4_SQ_PGM_START_FS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0288A8_SQ_PGM_RESOURCES_FS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028900_SQ_ESGS_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028904_SQ_GSVS_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02891C_SQ_GS_VERT_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028940_ALU_CONST_CACHE_PS_0, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028980_ALU_CONST_CACHE_VS_0, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A00_PA_SU_POINT_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A08_PA_SU_LINE_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A10_VGT_OUTPUT_PATH_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A14_VGT_HOS_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A20_VGT_HOS_REUSE_DEPTH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A24_VGT_GROUP_PRIM_TYPE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A28_VGT_GROUP_FIRST_DECR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A2C_VGT_GROUP_DECR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A30_VGT_GROUP_VECT_0_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A34_VGT_GROUP_VECT_1_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A40_VGT_GS_MODE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A48_PA_SC_MODE_CNTL_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A4C_PA_SC_MODE_CNTL_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028AB4_VGT_REUSE_OFF, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028AB8_VGT_VTX_CNT_EN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028ABC_DB_HTILE_SURFACE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028AC8_DB_PRELOAD_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B54_VGT_SHADER_STAGES_EN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B70_DB_ALPHA_TO_MASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B94_VGT_STRMOUT_CONFIG, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C00_PA_SC_LINE_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C04_PA_SC_AA_CONFIG, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C08_PA_SU_VTX_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C3C_PA_SC_AA_MASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C60_CB_COLOR0_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C64_CB_COLOR0_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C68_CB_COLOR0_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C6C_CB_COLOR0_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C70_CB_COLOR0_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C74_CB_COLOR0_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C78_CB_COLOR0_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C9C_CB_COLOR1_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CA0_CB_COLOR1_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CA4_CB_COLOR1_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CA8_CB_COLOR1_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CAC_CB_COLOR1_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CB0_CB_COLOR1_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CB4_CB_COLOR1_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CD8_CB_COLOR2_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CDC_CB_COLOR2_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CE0_CB_COLOR2_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CE4_CB_COLOR2_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CE8_CB_COLOR2_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CEC_CB_COLOR2_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CF0_CB_COLOR2_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D14_CB_COLOR3_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D18_CB_COLOR3_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D1C_CB_COLOR3_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D20_CB_COLOR3_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D24_CB_COLOR3_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D28_CB_COLOR3_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D2C_CB_COLOR3_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D50_CB_COLOR4_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D54_CB_COLOR4_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D58_CB_COLOR4_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D5C_CB_COLOR4_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D60_CB_COLOR4_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D64_CB_COLOR4_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D68_CB_COLOR4_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D8C_CB_COLOR5_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D90_CB_COLOR5_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D94_CB_COLOR5_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D98_CB_COLOR5_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D9C_CB_COLOR5_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DA0_CB_COLOR5_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DA4_CB_COLOR5_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DC8_CB_COLOR6_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DCC_CB_COLOR6_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DD0_CB_COLOR6_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DD4_CB_COLOR6_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DD8_CB_COLOR6_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DDC_CB_COLOR6_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DE0_CB_COLOR6_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E04_CB_COLOR7_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E08_CB_COLOR7_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E0C_CB_COLOR7_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E10_CB_COLOR7_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E14_CB_COLOR7_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E18_CB_COLOR7_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E1C_CB_COLOR7_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E40_CB_COLOR8_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E44_CB_COLOR8_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E48_CB_COLOR8_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E4C_CB_COLOR8_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E50_CB_COLOR8_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E54_CB_COLOR8_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E58_CB_COLOR8_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E5C_CB_COLOR9_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E60_CB_COLOR9_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E64_CB_COLOR9_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E68_CB_COLOR9_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E6C_CB_COLOR9_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E70_CB_COLOR9_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E74_CB_COLOR9_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E78_CB_COLOR10_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E7C_CB_COLOR10_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E80_CB_COLOR10_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E84_CB_COLOR10_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E88_CB_COLOR10_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E8C_CB_COLOR10_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E90_CB_COLOR10_DIM, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E94_CB_COLOR11_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E98_CB_COLOR11_PITCH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E9C_CB_COLOR11_SLICE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028EA0_CB_COLOR11_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028EA4_CB_COLOR11_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028EA8_CB_COLOR11_ATTRIB, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028EAC_CB_COLOR11_DIM, 0, 0, 0}, +}; + +/* SHADER RESOURCE R600/R700 */ +static int evergreen_state_resource_init(struct r600_context *ctx, u32 offset) +{ + struct r600_reg r600_shader_resource[] = { + {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030000_RESOURCE0_WORD0, 0, 0, 0}, + {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030004_RESOURCE0_WORD1, 0, 0, 0}, + {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030008_RESOURCE0_WORD2, 1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_03000C_RESOURCE0_WORD3, 1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030010_RESOURCE0_WORD4, 0, 0, 0}, + {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030014_RESOURCE0_WORD5, 0, 0, 0}, + {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030018_RESOURCE0_WORD6, 0, 0, 0}, + {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_03001C_RESOURCE0_WORD7, 0, 0, 0}, + }; + unsigned nreg = Elements(r600_shader_resource); + + for (int i = 0; i < nreg; i++) { + r600_shader_resource[i].offset += offset; + } + return r600_context_add_block(ctx, r600_shader_resource, nreg); +} + +/* SHADER SAMPLER R600/R700 */ +static int r600_state_sampler_init(struct r600_context *ctx, u32 offset) +{ + struct r600_reg r600_shader_sampler[] = { + {PKT3_SET_SAMPLER, EVERGREEN_SAMPLER_OFFSET, R_03C000_SQ_TEX_SAMPLER_WORD0_0, 0, 0, 0}, + {PKT3_SET_SAMPLER, EVERGREEN_SAMPLER_OFFSET, R_03C004_SQ_TEX_SAMPLER_WORD1_0, 0, 0, 0}, + {PKT3_SET_SAMPLER, EVERGREEN_SAMPLER_OFFSET, R_03C008_SQ_TEX_SAMPLER_WORD2_0, 0, 0, 0}, + }; + unsigned nreg = Elements(r600_shader_sampler); + + for (int i = 0; i < nreg; i++) { + r600_shader_sampler[i].offset += offset; + } + return r600_context_add_block(ctx, r600_shader_sampler, nreg); +} + +/* SHADER SAMPLER BORDER R600/R700 */ +static int evergreen_state_sampler_border_init(struct r600_context *ctx, u32 offset, unsigned id) +{ + struct r600_reg r600_shader_sampler_border[] = { + {PKT3_SET_CONFIG_REG, 0, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, 0, R_00A404_TD_PS_SAMPLER0_BORDER_RED, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, 0, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, 0, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, 0, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, 0, 0, 0}, + }; + unsigned nreg = Elements(r600_shader_sampler_border); + unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) * 0x100 + 0x40000 + id * 0x1C; + struct r600_range *range; + struct r600_block *block; + int r; + + for (int i = 0; i < nreg; i++) { + r600_shader_sampler_border[i].offset -= R_00A400_TD_PS_SAMPLER0_BORDER_INDEX; + r600_shader_sampler_border[i].offset += fake_offset; + } + r = r600_context_add_block(ctx, r600_shader_sampler_border, nreg); + if (r) { + return r; + } + /* set proper offset */ + range = &ctx->range[CTX_RANGE_ID(ctx, r600_shader_sampler_border[0].offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, r600_shader_sampler_border[0].offset)]; + block->pm4[1] = (offset - EVERGREEN_CONFIG_REG_OFFSET) >> 2; + return 0; +} + +static int evergreen_loop_const_init(struct r600_context *ctx, u32 offset) +{ + unsigned nreg = 32; + struct r600_reg r600_loop_consts[32]; + int i; + + for (i = 0; i < nreg; i++) { + r600_loop_consts[i].opcode = PKT3_SET_LOOP_CONST; + r600_loop_consts[i].offset_base = EVERGREEN_LOOP_CONST_OFFSET; + r600_loop_consts[i].offset = EVERGREEN_LOOP_CONST_OFFSET + ((offset + i) * 4); + r600_loop_consts[i].need_bo = 0; + r600_loop_consts[i].flush_flags = 0; + } + return r600_context_add_block(ctx, r600_loop_consts, nreg); +} + +int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) +{ + int r; + + memset(ctx, 0, sizeof(struct r600_context)); + ctx->radeon = radeon; + LIST_INITHEAD(&ctx->query_list); + + /* initialize hash */ + ctx->hash_size = 19; + ctx->hash_shift = 11; + for (int i = 0; i < 256; i++) { + ctx->range[i].start_offset = i << ctx->hash_shift; + ctx->range[i].end_offset = ((i + 1) << ctx->hash_shift) - 1; + ctx->range[i].blocks = calloc(1 << ctx->hash_shift, sizeof(void*)); + if (ctx->range[i].blocks == NULL) { + return -ENOMEM; + } + } + + /* add blocks */ + r = r600_context_add_block(ctx, evergreen_config_reg_list, + Elements(evergreen_config_reg_list)); + if (r) + goto out_err; + r = r600_context_add_block(ctx, evergreen_context_reg_list, + Elements(evergreen_context_reg_list)); + if (r) + goto out_err; + r = r600_context_add_block(ctx, evergreen_ctl_const_list, + Elements(evergreen_ctl_const_list)); + if (r) + goto out_err; + + + /* PS SAMPLER */ + for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) { + r = r600_state_sampler_init(ctx, offset); + if (r) + goto out_err; + } + /* VS SAMPLER */ + for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) { + r = r600_state_sampler_init(ctx, offset); + if (r) + goto out_err; + } + /* PS SAMPLER BORDER */ + for (int j = 0; j < 18; j++) { + r = evergreen_state_sampler_border_init(ctx, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, j); + if (r) + goto out_err; + } + /* VS SAMPLER BORDER */ + for (int j = 0; j < 18; j++) { + r = evergreen_state_sampler_border_init(ctx, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, j); + if (r) + goto out_err; + } + /* PS RESOURCE */ + for (int j = 0, offset = 0; j < 176; j++, offset += 0x20) { + r = evergreen_state_resource_init(ctx, offset); + if (r) + goto out_err; + } + /* VS RESOURCE */ + for (int j = 0, offset = 0x1600; j < 160; j++, offset += 0x20) { + r = evergreen_state_resource_init(ctx, offset); + if (r) + goto out_err; + } + + /* PS loop const */ + evergreen_loop_const_init(ctx, 0); + /* VS loop const */ + evergreen_loop_const_init(ctx, 32); + + /* setup block table */ + ctx->blocks = calloc(ctx->nblocks, sizeof(void*)); + for (int i = 0, c = 0; i < 256; i++) { + for (int j = 0; j < (1 << ctx->hash_shift); j++) { + if (ctx->range[i].blocks[j]) { + assert(c < ctx->nblocks); + ctx->blocks[c++] = ctx->range[i].blocks[j]; + j += (ctx->range[i].blocks[j]->nreg << 2) - 1; + } + } + } + + /* allocate cs variables */ + ctx->nreloc = RADEON_CTX_MAX_PM4; + ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); + if (ctx->reloc == NULL) { + r = -ENOMEM; + goto out_err; + } + ctx->bo = calloc(ctx->nreloc, sizeof(void *)); + if (ctx->bo == NULL) { + r = -ENOMEM; + goto out_err; + } + ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; + ctx->pm4 = calloc(ctx->pm4_ndwords, 4); + if (ctx->pm4 == NULL) { + r = -ENOMEM; + goto out_err; + } + /* save 16dwords space for fence mecanism */ + ctx->pm4_ndwords -= 16; + + r = r600_context_init_fence(ctx); + if (r) { + goto out_err; + } + + /* init dirty list */ + LIST_INITHEAD(&ctx->dirty); + return 0; +out_err: + r600_context_fini(ctx); + return r; +} + +static inline void evergreen_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +{ + struct r600_range *range; + struct r600_block *block; + + range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + if (state == NULL) { + block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); + r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); + LIST_DELINIT(&block->list); + return; + } + block->reg[0] = state->regs[0].value; + block->reg[1] = state->regs[1].value; + block->reg[2] = state->regs[2].value; + block->reg[3] = state->regs[3].value; + block->reg[4] = state->regs[4].value; + block->reg[5] = state->regs[5].value; + block->reg[6] = state->regs[6].value; + block->reg[7] = state->regs[7].value; + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); + r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); + if (state->regs[0].bo) { + /* VERTEX RESOURCE, we preted there is 2 bo to relocate so + * we have single case btw VERTEX & TEXTURE resource + */ + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo); + } else { + /* TEXTURE RESOURCE */ + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); + } + if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + LIST_ADDTAIL(&block->list,&ctx->dirty); + } +} + +void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +{ + unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x20 * rid; + + evergreen_context_pipe_state_set_resource(ctx, state, offset); +} + +void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +{ + unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x1600 + 0x20 * rid; + + evergreen_context_pipe_state_set_resource(ctx, state, offset); +} + +static inline void evergreen_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +{ + struct r600_range *range; + struct r600_block *block; + + range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + if (state == NULL) { + block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); + LIST_DELINIT(&block->list); + return; + } + block->reg[0] = state->regs[0].value; + block->reg[1] = state->regs[1].value; + block->reg[2] = state->regs[2].value; + if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + LIST_ADDTAIL(&block->list,&ctx->dirty); + } +} + +static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset, unsigned id) +{ + unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) * 0x100 + 0x40000 + id * 0x1C; + struct r600_range *range; + struct r600_block *block; + + range = &ctx->range[CTX_RANGE_ID(ctx, fake_offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, fake_offset)]; + if (state == NULL) { + block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); + LIST_DELINIT(&block->list); + return; + } + if (state->nregs <= 3) { + return; + } + block->reg[0] = id; + block->reg[1] = state->regs[3].value; + block->reg[2] = state->regs[4].value; + block->reg[3] = state->regs[5].value; + block->reg[4] = state->regs[6].value; + if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + LIST_ADDTAIL(&block->list,&ctx->dirty); + } +} + +void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) +{ + unsigned offset; + + offset = 0x0003C000 + id * 0xc; + evergreen_context_pipe_state_set_sampler(ctx, state, offset); + evergreen_context_pipe_state_set_sampler_border(ctx, state, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, id); +} + +void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) +{ + unsigned offset; + + offset = 0x0003C0D8 + id * 0xc; + evergreen_context_pipe_state_set_sampler(ctx, state, offset); + evergreen_context_pipe_state_set_sampler_border(ctx, state, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, id); +} + + +void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw) +{ + struct r600_bo *cb[12]; + struct r600_bo *db; + unsigned ndwords = 9, flush; + struct r600_block *dirty_block = NULL; + struct r600_block *next_block; + + if (draw->indices) { + ndwords = 13; + /* make sure there is enough relocation space before scheduling draw */ + if (ctx->creloc >= (ctx->nreloc - 1)) { + r600_context_flush(ctx); + } + } + + /* find number of color buffer */ + db = r600_context_reg_bo(ctx, R_028048_DB_Z_READ_BASE); + cb[0] = r600_context_reg_bo(ctx, R_028C60_CB_COLOR0_BASE); + cb[1] = r600_context_reg_bo(ctx, R_028C9C_CB_COLOR1_BASE); + cb[2] = r600_context_reg_bo(ctx, R_028CD8_CB_COLOR2_BASE); + cb[3] = r600_context_reg_bo(ctx, R_028D14_CB_COLOR3_BASE); + cb[4] = r600_context_reg_bo(ctx, R_028D50_CB_COLOR4_BASE); + cb[5] = r600_context_reg_bo(ctx, R_028D8C_CB_COLOR5_BASE); + cb[6] = r600_context_reg_bo(ctx, R_028DC8_CB_COLOR6_BASE); + cb[7] = r600_context_reg_bo(ctx, R_028E04_CB_COLOR7_BASE); + cb[8] = r600_context_reg_bo(ctx, R_028E40_CB_COLOR8_BASE); + cb[9] = r600_context_reg_bo(ctx, R_028E5C_CB_COLOR9_BASE); + cb[10] = r600_context_reg_bo(ctx, R_028E78_CB_COLOR10_BASE); + cb[11] = r600_context_reg_bo(ctx, R_028E94_CB_COLOR11_BASE); + for (int i = 0; i < 12; i++) { + if (cb[i]) { + ndwords += 7; + } + } + if (db) + ndwords += 7; + + /* queries need some special values */ + if (ctx->num_query_running) { + r600_context_reg(ctx, + R_028004_DB_COUNT_CONTROL, + S_028004_PERFECT_ZPASS_COUNTS(1), + S_028004_PERFECT_ZPASS_COUNTS(1)); + r600_context_reg(ctx, + R_02800C_DB_RENDER_OVERRIDE, + S_02800C_NOOP_CULL_DISABLE(1), + S_02800C_NOOP_CULL_DISABLE(1)); + } + + if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { + /* need to flush */ + r600_context_flush(ctx); + } + /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ + if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { + R600_ERR("context is too big to be scheduled\n"); + return; + } + + /* enough room to copy packet */ + LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->dirty,list) { + r600_context_block_emit_dirty(ctx, dirty_block); + } + + /* draw packet */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0); + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0); + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; + if (draw->indices) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3); + ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); + } else { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1); + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + } + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT; + + /* flush color buffer */ + for (int i = 0; i < 12; i++) { + if (cb[i]) { + if (i > 7) { + flush = (S_0085F0_CB8_DEST_BASE_ENA(1) << (i - 8)) | + S_0085F0_CB_ACTION_ENA(1); + } else { + flush = (S_0085F0_CB0_DEST_BASE_ENA(1) << i) | + S_0085F0_CB_ACTION_ENA(1); + } + r600_context_bo_flush(ctx, flush, 0, cb[i]); + } + } + if (db) { + r600_context_bo_flush(ctx, + S_0085F0_DB_ACTION_ENA(1) | + S_0085F0_DB_DEST_BASE_ENA(1), + 0, db); + } + + /* all dirty state have been scheduled in current cs */ + ctx->pm4_dirty_cdwords = 0; +} + +static inline void evergreen_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +{ + struct r600_range *range; + struct r600_block *block; + + range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + block->reg[0] = state->regs[0].value; + block->reg[1] = state->regs[1].value; + block->reg[2] = state->regs[2].value; + block->reg[3] = state->regs[3].value; + block->reg[4] = state->regs[4].value; + block->reg[5] = state->regs[5].value; + block->reg[6] = state->regs[6].value; + block->reg[7] = state->regs[7].value; + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); + r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); + if (state->regs[0].bo) { + /* VERTEX RESOURCE, we preted there is 2 bo to relocate so + * we have single case btw VERTEX & TEXTURE resource + */ + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo); + } else { + /* TEXTURE RESOURCE */ + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); + } + if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + LIST_ADDTAIL(&block->list,&ctx->dirty); + } +} + +void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +{ + unsigned offset = R_030000_RESOURCE0_WORD0 + 0x20 * rid; + + evergreen_resource_set(ctx, state, offset); +} + +void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +{ + unsigned offset = R_030000_RESOURCE0_WORD0 + 0x1600 + 0x20 * rid; + + evergreen_resource_set(ctx, state, offset); +} diff --git a/src/gallium/winsys/r600/drm/gen_eg_states.py b/src/gallium/winsys/r600/drm/gen_eg_states.py deleted file mode 100644 index b2e5b2203a..0000000000 --- a/src/gallium/winsys/r600/drm/gen_eg_states.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -import re - -def main(): - fileIN = open('eg_states.h', 'r') - line = fileIN.readline() - next_is_reg = False - count = 0 - - print "/* This file is autogenerated from eg_states.h - do not edit directly */" - print "/* autogenerating script is gen_eg_states.py */" - print "" - while line: - if line[0:2] == "};": - if next_is_reg == True: - print "#define " + name + "_SIZE\t\t", count - print "#define " + name + "_PM4 128\t\t" - next_is_reg = False - count = 0 - print "" - - if line[0:6] == "static": - name = line.rstrip("\n") - cline = name.split() - name = cline[4].split('[') - name = name[0].replace("_names", "") - print "/* " + name + " */" - next_is_reg = True - elif next_is_reg == True: - reg = line.split(); - reg = reg[3].replace("},", "") - reg = reg.replace("\"", "") - print "#define " + name + "__" + reg + "\t\t", count - count = count + 1 - - line = fileIN.readline() - -if __name__ == "__main__": - main() diff --git a/src/gallium/winsys/r600/drm/gen_r600_states.py b/src/gallium/winsys/r600/drm/gen_r600_states.py deleted file mode 100644 index 9bd5ab2082..0000000000 --- a/src/gallium/winsys/r600/drm/gen_r600_states.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -import re - -def main(): - fileIN = open('r600_states.h', 'r') - line = fileIN.readline() - next_is_reg = False - count = 0 - - print "/* This file is autogenerated from r600_states.h - do not edit directly */" - print "/* autogenerating script is gen_r600_states.py */" - print "" - while line: - if line[0:2] == "};": - if next_is_reg == True: - print "#define " + name + "_SIZE\t\t", count - print "#define " + name + "_PM4 128\t\t" - next_is_reg = False - count = 0 - print "" - - if line[0:6] == "static": - name = line.rstrip("\n") - cline = name.split() - name = cline[4].split('[') - name = name[0].replace("_names", "") - print "/* " + name + " */" - next_is_reg = True - elif next_is_reg == True: - reg = line.split(); - reg = reg[3].replace("},", "") - reg = reg.replace("\"", "") - print "#define " + name + "__" + reg + "\t\t", count - count = count + 1 - - line = fileIN.readline() - -if __name__ == "__main__": - main() diff --git a/src/gallium/winsys/r600/drm/r600.c b/src/gallium/winsys/r600/drm/r600.c index af9b9187ab..0a4d2e791d 100644 --- a/src/gallium/winsys/r600/drm/r600.c +++ b/src/gallium/winsys/r600/drm/r600.c @@ -25,6 +25,9 @@ */ #include "xf86drm.h" #include "radeon_drm.h" +#include "pipe/p_compiler.h" +#include "util/u_inlines.h" +#include <pipebuffer/pb_bufmgr.h> #include "r600_priv.h" enum radeon_family r600_get_family(struct radeon *r600) @@ -32,6 +35,16 @@ enum radeon_family r600_get_family(struct radeon *r600) return r600->family; } +enum chip_class r600_get_family_class(struct radeon *radeon) +{ + return radeon->chip_class; +} + +struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon) +{ + return &radeon->tiling_info; +} + static int r600_get_device(struct radeon *r600) { struct drm_radeon_info info; @@ -117,6 +130,37 @@ struct radeon *r600_new(int fd, unsigned device) R600_ERR("unknown or unsupported chipset 0x%04X\n", r600->device); break; } + + /* setup class */ + switch (r600->family) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + r600->chip_class = R600; + break; + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + r600->chip_class = R700; + break; + case CHIP_CEDAR: + case CHIP_REDWOOD: + case CHIP_JUNIPER: + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + r600->chip_class = EVERGREEN; + break; + default: + R600_ERR("unknown or unsupported chipset 0x%04X\n", r600->device); + break; + } + return r600; } diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c new file mode 100644 index 0000000000..9498f3a82e --- /dev/null +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -0,0 +1,122 @@ +/* + * Copyright 2010 Dave Airlie + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Dave Airlie + */ +#include <pipe/p_compiler.h> +#include <pipe/p_screen.h> +#include <pipebuffer/pb_bufmgr.h> +#include "r600_priv.h" + +struct r600_bo *r600_bo(struct radeon *radeon, + unsigned size, unsigned alignment, unsigned usage) +{ + struct r600_bo *ws_bo = calloc(1, sizeof(struct r600_bo)); + struct pb_desc desc; + struct pb_manager *man; + + desc.alignment = alignment; + desc.usage = usage; + ws_bo->size = size; + + if (usage & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) + man = radeon->cman; + else + man = radeon->kman; + + ws_bo->pb = man->create_buffer(man, size, &desc); + if (ws_bo->pb == NULL) { + free(ws_bo); + return NULL; + } + + pipe_reference_init(&ws_bo->reference, 1); + return ws_bo; +} + +struct r600_bo *r600_bo_handle(struct radeon *radeon, + unsigned handle) +{ + struct r600_bo *ws_bo = calloc(1, sizeof(struct r600_bo)); + struct radeon_bo *bo; + + ws_bo->pb = radeon_bo_pb_create_buffer_from_handle(radeon->kman, handle); + if (!ws_bo->pb) { + free(ws_bo); + return NULL; + } + bo = radeon_bo_pb_get_bo(ws_bo->pb); + ws_bo->size = bo->size; + pipe_reference_init(&ws_bo->reference, 1); + return ws_bo; +} + +void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx) +{ + return pb_map(bo->pb, usage, ctx); +} + +void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo) +{ + pb_unmap(bo->pb); +} + +static void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) +{ + if (bo->pb) + pb_reference(&bo->pb, NULL); + free(bo); +} + +void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, + struct r600_bo *src) +{ + struct r600_bo *old = *dst; + + if (pipe_reference(&(*dst)->reference, &src->reference)) { + r600_bo_destroy(radeon, old); + } + *dst = src; +} + +unsigned r600_bo_get_handle(struct r600_bo *pb_bo) +{ + struct radeon_bo *bo; + + bo = radeon_bo_pb_get_bo(pb_bo->pb); + if (!bo) + return 0; + + return bo->handle; +} + +unsigned r600_bo_get_size(struct r600_bo *pb_bo) +{ + struct radeon_bo *bo; + + bo = radeon_bo_pb_get_bo(pb_bo->pb); + if (!bo) + return 0; + + return bo->size; +} diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 7a1a762f54..c9de95ffc0 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -25,14 +25,231 @@ * Corbin Simpson <MostAwesomeDude@gmail.com> * Joakim Sindholt <opensource@zhasha.com> */ +#include <stdio.h> +#include <errno.h> #include <sys/ioctl.h> #include "util/u_inlines.h" #include "util/u_debug.h" -#include "radeon_priv.h" +#include <pipebuffer/pb_bufmgr.h> +#include "r600.h" +#include "r600_priv.h" #include "r600_drm_public.h" +#include "xf86drm.h" +#include "radeon_drm.h" + +#ifndef RADEON_INFO_TILING_CONFIG +#define RADEON_INFO_TILING_CONFIG 0x6 +#endif +static int radeon_get_device(struct radeon *radeon) +{ + struct drm_radeon_info info; + int r; + + radeon->device = 0; + info.request = RADEON_INFO_DEVICE_ID; + info.value = (uintptr_t)&radeon->device; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + return r; +} + +static int radeon_drm_get_tiling(struct radeon *radeon) +{ + struct drm_radeon_info info; + int r; + uint32_t tiling_config; + + info.request = RADEON_INFO_TILING_CONFIG; + info.value = (uintptr_t)&tiling_config; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + + if (r) + return r; + + switch ((tiling_config & 0xe) >> 1) { + case 0: + radeon->tiling_info.num_channels = 1; + break; + case 1: + radeon->tiling_info.num_channels = 2; + break; + case 2: + radeon->tiling_info.num_channels = 4; + break; + case 3: + radeon->tiling_info.num_channels = 8; + break; + default: + return -EINVAL; + } + + switch ((tiling_config & 0x30) >> 4) { + case 0: + radeon->tiling_info.num_banks = 4; + break; + case 1: + radeon->tiling_info.num_banks = 8; + break; + default: + return -EINVAL; + + } + switch ((tiling_config & 0xc0) >> 6) { + case 0: + radeon->tiling_info.group_bytes = 256; + break; + case 1: + radeon->tiling_info.group_bytes = 512; + break; + default: + return -EINVAL; + } + return 0; +} + +struct radeon *radeon_new(int fd, unsigned device) +{ + struct radeon *radeon; + int r; + + radeon = calloc(1, sizeof(*radeon)); + if (radeon == NULL) { + return NULL; + } + radeon->fd = fd; + radeon->device = device; + radeon->refcount = 1; + if (fd >= 0) { + r = radeon_get_device(radeon); + if (r) { + fprintf(stderr, "Failed to get device id\n"); + return radeon_decref(radeon); + } + } + radeon->family = radeon_family_from_device(radeon->device); + if (radeon->family == CHIP_UNKNOWN) { + fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device); + return radeon_decref(radeon); + } + switch (radeon->family) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + case CHIP_CEDAR: + case CHIP_REDWOOD: + case CHIP_JUNIPER: + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + break; + case CHIP_R100: + case CHIP_RV100: + case CHIP_RS100: + case CHIP_RV200: + case CHIP_RS200: + case CHIP_R200: + case CHIP_RV250: + case CHIP_RS300: + case CHIP_RV280: + case CHIP_R300: + case CHIP_R350: + case CHIP_RV350: + case CHIP_RV380: + case CHIP_R420: + case CHIP_R423: + case CHIP_RV410: + case CHIP_RS400: + case CHIP_RS480: + case CHIP_RS600: + case CHIP_RS690: + case CHIP_RS740: + case CHIP_RV515: + case CHIP_R520: + case CHIP_RV530: + case CHIP_RV560: + case CHIP_RV570: + case CHIP_R580: + default: + fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n", + __func__, radeon->device); + break; + } + + /* setup class */ + switch (radeon->family) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + radeon->chip_class = R600; + break; + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + radeon->chip_class = R700; + break; + case CHIP_CEDAR: + case CHIP_REDWOOD: + case CHIP_JUNIPER: + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + radeon->chip_class = EVERGREEN; + break; + default: + fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n", + __func__, radeon->device); + break; + } + + if (radeon->chip_class == R600 || radeon->chip_class == R700) { + if (radeon_drm_get_tiling(radeon)) + return NULL; + } + radeon->kman = radeon_bo_pbmgr_create(radeon); + if (!radeon->kman) + return NULL; + radeon->cman = pb_cache_manager_create(radeon->kman, 100000); + if (!radeon->cman) + return NULL; + return radeon; +} struct radeon *r600_drm_winsys_create(int drmfd) { return radeon_new(drmfd, 0); } +struct radeon *radeon_decref(struct radeon *radeon) +{ + if (radeon == NULL) + return NULL; + if (--radeon->refcount > 0) { + return NULL; + } + + if (radeon->cman) + radeon->cman->destroy(radeon->cman); + + if (radeon->kman) + radeon->kman->destroy(radeon->kman); + + if (radeon->fd >= 0) + drmClose(radeon->fd); + + free(radeon); + return NULL; +} diff --git a/src/gallium/winsys/r600/drm/r600_drm_public.h b/src/gallium/winsys/r600/drm/r600_drm_public.h index 84f2dce437..cfce8df9c2 100644 --- a/src/gallium/winsys/r600/drm/r600_drm_public.h +++ b/src/gallium/winsys/r600/drm/r600_drm_public.h @@ -1,4 +1,28 @@ - +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ #ifndef R600_DRM_PUBLIC_H #define R600_DRM_PUBLIC_H diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c new file mode 100644 index 0000000000..2521ff9647 --- /dev/null +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -0,0 +1,1362 @@ +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include "xf86drm.h" +#include "r600.h" +#include "r600d.h" +#include "radeon_drm.h" +#include "bof.h" +#include "pipe/p_compiler.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include <pipebuffer/pb_bufmgr.h> +#include "r600_priv.h" + +#define GROUP_FORCE_NEW_BLOCK 0 + +int r600_context_init_fence(struct r600_context *ctx) +{ + ctx->fence = 1; + ctx->fence_bo = r600_bo(ctx->radeon, 4096, 0, 0); + if (ctx->fence_bo == NULL) { + return -ENOMEM; + } + ctx->cfence = r600_bo_map(ctx->radeon, ctx->fence_bo, PB_USAGE_UNSYNCHRONIZED, NULL); + *ctx->cfence = 0; + LIST_INITHEAD(&ctx->fenced_bo); + return 0; +} + +static void INLINE r600_context_update_fenced_list(struct r600_context *ctx) +{ + for (int i = 0; i < ctx->creloc; i++) { + if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist)) + LIST_DELINIT(&ctx->bo[i]->fencedlist); + LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo); + ctx->bo[i]->fence = ctx->fence; + ctx->bo[i]->ctx = ctx; + } +} + +static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsigned fence) +{ + struct radeon_bo *bo = NULL; + struct radeon_bo *tmp; + + LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { + if (bo->fence <= *ctx->cfence) { + LIST_DELINIT(&bo->fencedlist); + bo->fence = 0; + } else { + bo->fence = fence; + } + } +} + +int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg) +{ + struct r600_block *block; + struct r600_range *range; + int offset; + + for (unsigned i = 0, n = 0; i < nreg; i += n) { + u32 j; + + /* ignore new block balise */ + if (reg[i].offset == GROUP_FORCE_NEW_BLOCK) { + n = 1; + continue; + } + + /* register that need relocation are in their own group */ + /* find number of consecutive registers */ + n = 0; + offset = reg[i].offset; + while (reg[i + n].offset == offset) { + n++; + offset += 4; + if ((n + i) >= nreg) + break; + if (n >= (R600_BLOCK_MAX_REG - 2)) + break; + } + + /* allocate new block */ + block = calloc(1, sizeof(struct r600_block)); + if (block == NULL) { + return -ENOMEM; + } + ctx->nblocks++; + for (int j = 0; j < n; j++) { + range = &ctx->range[CTX_RANGE_ID(ctx, reg[i + j].offset)]; + range->blocks[CTX_BLOCK_ID(ctx, reg[i + j].offset)] = block; + } + + /* initialize block */ + block->start_offset = reg[i].offset; + block->pm4[block->pm4_ndwords++] = PKT3(reg[i].opcode, n); + block->pm4[block->pm4_ndwords++] = (block->start_offset - reg[i].offset_base) >> 2; + block->reg = &block->pm4[block->pm4_ndwords]; + block->pm4_ndwords += n; + block->nreg = n; + LIST_INITHEAD(&block->list); + + for (j = 0; j < n; j++) { + if (reg[i+j].need_bo) { + block->nbo++; + assert(block->nbo < R600_BLOCK_MAX_BO); + block->pm4_bo_index[j] = block->nbo; + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0); + block->pm4[block->pm4_ndwords++] = 0x00000000; + block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; + block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; + block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1; + } + } + for (j = 0; j < n; j++) { + if (reg[i+j].flush_flags) { + block->pm4_flush_ndwords += 7; + } + } + /* check that we stay in limit */ + assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); + } + return 0; +} + +/* R600/R700 configuration */ +static const struct r600_reg r600_config_reg_list[] = { + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C00_SQ_CONFIG, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C0C_SQ_THREAD_RESOURCE_MGMT, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C10_SQ_STACK_RESOURCE_MGMT_1, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C14_SQ_STACK_RESOURCE_MGMT_2, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_009508_TA_CNTL_AUX, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_009714_VC_ENHANCE, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_009830_DB_DEBUG, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_009838_DB_WATERMARKS, 0, 0, 0}, +}; + +static const struct r600_reg r600_ctl_const_list[] = { + {PKT3_SET_CTL_CONST, R600_CTL_CONST_OFFSET, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0, 0}, + {PKT3_SET_CTL_CONST, R600_CTL_CONST_OFFSET, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0, 0}, +}; + +static const struct r600_reg r600_context_reg_list[] = { + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028350_SX_MISC, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286C8_SPI_THREAD_GROUPING, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A10_VGT_OUTPUT_PATH_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A14_VGT_HOS_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A20_VGT_HOS_REUSE_DEPTH, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A24_VGT_GROUP_PRIM_TYPE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A28_VGT_GROUP_FIRST_DECR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A2C_VGT_GROUP_DECR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A30_VGT_GROUP_VECT_0_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A34_VGT_GROUP_VECT_1_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A40_VGT_GS_MODE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A4C_PA_SC_MODE_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028AB0_VGT_STRMOUT_EN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028AB4_VGT_REUSE_OFF, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028AB8_VGT_VTX_CNT_EN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028B20_VGT_STRMOUT_BUFFER_EN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028028_DB_STENCIL_CLEAR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02802C_DB_DEPTH_CLEAR, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028040_CB_COLOR0_BASE, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A0_CB_COLOR0_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028060_CB_COLOR0_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028080_CB_COLOR0_VIEW, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280E0_CB_COLOR0_FRAG, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280C0_CB_COLOR0_TILE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028100_CB_COLOR0_MASK, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028044_CB_COLOR1_BASE, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A4_CB_COLOR1_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028064_CB_COLOR1_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028084_CB_COLOR1_VIEW, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280E4_CB_COLOR1_FRAG, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280C4_CB_COLOR1_TILE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028104_CB_COLOR1_MASK, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028048_CB_COLOR2_BASE, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A8_CB_COLOR2_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028068_CB_COLOR2_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028088_CB_COLOR2_VIEW, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280E8_CB_COLOR2_FRAG, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280C8_CB_COLOR2_TILE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028108_CB_COLOR2_MASK, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02804C_CB_COLOR3_BASE, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280AC_CB_COLOR3_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02806C_CB_COLOR3_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02808C_CB_COLOR3_VIEW, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280EC_CB_COLOR3_FRAG, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280CC_CB_COLOR3_TILE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02810C_CB_COLOR3_MASK, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028050_CB_COLOR4_BASE, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B0_CB_COLOR4_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028070_CB_COLOR4_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028090_CB_COLOR4_VIEW, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280F0_CB_COLOR4_FRAG, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280D0_CB_COLOR4_TILE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028110_CB_COLOR4_MASK, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028054_CB_COLOR5_BASE, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B4_CB_COLOR5_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028074_CB_COLOR5_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028094_CB_COLOR5_VIEW, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280F4_CB_COLOR5_FRAG, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280D4_CB_COLOR5_TILE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028114_CB_COLOR5_MASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028058_CB_COLOR6_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B8_CB_COLOR6_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028078_CB_COLOR6_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028098_CB_COLOR6_VIEW, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280F8_CB_COLOR6_FRAG, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280D8_CB_COLOR6_TILE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028118_CB_COLOR6_MASK, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02805C_CB_COLOR7_BASE, 1, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280BC_CB_COLOR7_INFO, 1, 0, 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02807C_CB_COLOR7_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02809C_CB_COLOR7_VIEW, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280FC_CB_COLOR7_FRAG, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280DC_CB_COLOR7_TILE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02811C_CB_COLOR7_MASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028120_CB_CLEAR_RED, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028124_CB_CLEAR_GREEN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028128_CB_CLEAR_BLUE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02812C_CB_CLEAR_ALPHA, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028940_ALU_CONST_CACHE_PS_0, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028980_ALU_CONST_CACHE_VS_0, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02823C_CB_SHADER_MASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028238_CB_TARGET_MASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028414_CB_BLEND_RED, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028418_CB_BLEND_GREEN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02841C_CB_BLEND_BLUE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028420_CB_BLEND_ALPHA, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028424_CB_FOG_RED, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028428_CB_FOG_GREEN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02842C_CB_FOG_BLUE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028430_DB_STENCILREFMASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028434_DB_STENCILREFMASK_BF, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028438_SX_ALPHA_REF, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286DC_SPI_FOG_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286E0_SPI_FOG_FUNC_SCALE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286E4_SPI_FOG_FUNC_BIAS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028780_CB_BLEND0_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028784_CB_BLEND1_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028788_CB_BLEND2_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02878C_CB_BLEND3_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028790_CB_BLEND4_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028794_CB_BLEND5_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028798_CB_BLEND6_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02879C_CB_BLEND7_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0287A0_CB_SHADER_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028800_DB_DEPTH_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028804_CB_BLEND_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028808_CB_COLOR_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02880C_DB_SHADER_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C04_PA_SC_AA_CONFIG, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C30_CB_CLRCMP_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C34_CB_CLRCMP_SRC, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C38_CB_CLRCMP_DST, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C3C_CB_CLRCMP_MSK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C48_PA_SC_AA_MASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D44_DB_ALPHA_TO_MASK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02800C_DB_DEPTH_BASE, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028000_DB_DEPTH_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028004_DB_DEPTH_VIEW, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028010_DB_DEPTH_INFO, 1, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D0C_DB_RENDER_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D10_DB_RENDER_OVERRIDE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D24_DB_HTILE_SURFACE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D30_DB_PRELOAD_CONTROL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D34_DB_PREFETCH_LIMIT, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028034_PA_SC_SCREEN_SCISSOR_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028200_PA_SC_WINDOW_OFFSET, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02820C_PA_SC_CLIPRECT_RULE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028210_PA_SC_CLIPRECT_0_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028214_PA_SC_CLIPRECT_0_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028218_PA_SC_CLIPRECT_1_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02821C_PA_SC_CLIPRECT_1_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028220_PA_SC_CLIPRECT_2_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028224_PA_SC_CLIPRECT_2_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028228_PA_SC_CLIPRECT_3_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02822C_PA_SC_CLIPRECT_3_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028230_PA_SC_EDGERULE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028240_PA_SC_GENERIC_SCISSOR_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028244_PA_SC_GENERIC_SCISSOR_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02843C_PA_CL_VPORT_XSCALE_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028440_PA_CL_VPORT_XOFFSET_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028444_PA_CL_VPORT_YSCALE_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028448_PA_CL_VPORT_YOFFSET_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02844C_PA_CL_VPORT_ZSCALE_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028450_PA_CL_VPORT_ZOFFSET_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286D4_SPI_INTERP_CONTROL_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028810_PA_CL_CLIP_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028814_PA_SU_SC_MODE_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028818_PA_CL_VTE_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028820_PA_CL_NANINF_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A00_PA_SU_POINT_SIZE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A08_PA_SU_LINE_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A0C_PA_SC_LINE_STIPPLE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A48_PA_SC_MPASS_PS_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C00_PA_SC_LINE_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E20_PA_CL_UCP0_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E24_PA_CL_UCP0_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E28_PA_CL_UCP0_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E2C_PA_CL_UCP0_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E30_PA_CL_UCP1_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E34_PA_CL_UCP1_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E38_PA_CL_UCP1_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E3C_PA_CL_UCP1_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E40_PA_CL_UCP2_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E44_PA_CL_UCP2_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E48_PA_CL_UCP2_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E4C_PA_CL_UCP2_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E50_PA_CL_UCP3_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E54_PA_CL_UCP3_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E58_PA_CL_UCP3_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E5C_PA_CL_UCP3_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E60_PA_CL_UCP4_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E64_PA_CL_UCP4_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E68_PA_CL_UCP4_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E6C_PA_CL_UCP4_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E70_PA_CL_UCP5_X, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E74_PA_CL_UCP5_Y, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E78_PA_CL_UCP5_Z, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E7C_PA_CL_UCP5_W, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028380_SQ_VTX_SEMANTIC_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028384_SQ_VTX_SEMANTIC_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028388_SQ_VTX_SEMANTIC_2, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02838C_SQ_VTX_SEMANTIC_3, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028390_SQ_VTX_SEMANTIC_4, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028394_SQ_VTX_SEMANTIC_5, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028398_SQ_VTX_SEMANTIC_6, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02839C_SQ_VTX_SEMANTIC_7, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283A0_SQ_VTX_SEMANTIC_8, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283A4_SQ_VTX_SEMANTIC_9, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283A8_SQ_VTX_SEMANTIC_10, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283AC_SQ_VTX_SEMANTIC_11, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283B0_SQ_VTX_SEMANTIC_12, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283B4_SQ_VTX_SEMANTIC_13, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283B8_SQ_VTX_SEMANTIC_14, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283BC_SQ_VTX_SEMANTIC_15, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283C0_SQ_VTX_SEMANTIC_16, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283C4_SQ_VTX_SEMANTIC_17, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283C8_SQ_VTX_SEMANTIC_18, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283CC_SQ_VTX_SEMANTIC_19, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283D0_SQ_VTX_SEMANTIC_20, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283D4_SQ_VTX_SEMANTIC_21, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283D8_SQ_VTX_SEMANTIC_22, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283DC_SQ_VTX_SEMANTIC_23, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283E0_SQ_VTX_SEMANTIC_24, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283E4_SQ_VTX_SEMANTIC_25, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283E8_SQ_VTX_SEMANTIC_26, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283EC_SQ_VTX_SEMANTIC_27, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283F0_SQ_VTX_SEMANTIC_28, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028614_SPI_VS_OUT_ID_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028618_SPI_VS_OUT_ID_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02861C_SPI_VS_OUT_ID_2, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028620_SPI_VS_OUT_ID_3, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028624_SPI_VS_OUT_ID_4, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028628_SPI_VS_OUT_ID_5, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02862C_SPI_VS_OUT_ID_6, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028630_SPI_VS_OUT_ID_7, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028634_SPI_VS_OUT_ID_8, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028638_SPI_VS_OUT_ID_9, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028858_SQ_PGM_START_VS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028868_SQ_PGM_RESOURCES_VS, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028894_SQ_PGM_START_FS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288A4_SQ_PGM_RESOURCES_FS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288D0_SQ_PGM_CF_OFFSET_VS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288DC_SQ_PGM_CF_OFFSET_FS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028644_SPI_PS_INPUT_CNTL_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028648_SPI_PS_INPUT_CNTL_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028650_SPI_PS_INPUT_CNTL_3, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028654_SPI_PS_INPUT_CNTL_4, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028658_SPI_PS_INPUT_CNTL_5, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028660_SPI_PS_INPUT_CNTL_7, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028664_SPI_PS_INPUT_CNTL_8, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028668_SPI_PS_INPUT_CNTL_9, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028670_SPI_PS_INPUT_CNTL_11, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028674_SPI_PS_INPUT_CNTL_12, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028678_SPI_PS_INPUT_CNTL_13, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028680_SPI_PS_INPUT_CNTL_15, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028684_SPI_PS_INPUT_CNTL_16, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028688_SPI_PS_INPUT_CNTL_17, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028690_SPI_PS_INPUT_CNTL_19, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028694_SPI_PS_INPUT_CNTL_20, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028698_SPI_PS_INPUT_CNTL_21, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286D8_SPI_INPUT_Z, 0, 0, 0}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028840_SQ_PGM_START_PS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028850_SQ_PGM_RESOURCES_PS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028854_SQ_PGM_EXPORTS_PS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288CC_SQ_PGM_CF_OFFSET_PS, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028400_VGT_MAX_VTX_INDX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028408_VGT_INDX_OFFSET, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A84_VGT_PRIMITIVEID_EN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0, 0, 0}, + {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0, 0, 0}, +}; + +/* SHADER RESOURCE R600/R700 */ +static int r600_state_resource_init(struct r600_context *ctx, u32 offset) +{ + struct r600_reg r600_shader_resource[] = { + {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038000_RESOURCE0_WORD0, 0, 0, 0}, + {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038004_RESOURCE0_WORD1, 0, 0, 0}, + {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038008_RESOURCE0_WORD2, 1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_03800C_RESOURCE0_WORD3, 1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038010_RESOURCE0_WORD4, 0, 0, 0}, + {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038014_RESOURCE0_WORD5, 0, 0, 0}, + {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038018_RESOURCE0_WORD6, 0, 0, 0}, + }; + unsigned nreg = Elements(r600_shader_resource); + + for (int i = 0; i < nreg; i++) { + r600_shader_resource[i].offset += offset; + } + return r600_context_add_block(ctx, r600_shader_resource, nreg); +} + +/* SHADER SAMPLER R600/R700 */ +static int r600_state_sampler_init(struct r600_context *ctx, u32 offset) +{ + struct r600_reg r600_shader_sampler[] = { + {PKT3_SET_SAMPLER, R600_SAMPLER_OFFSET, R_03C000_SQ_TEX_SAMPLER_WORD0_0, 0, 0, 0}, + {PKT3_SET_SAMPLER, R600_SAMPLER_OFFSET, R_03C004_SQ_TEX_SAMPLER_WORD1_0, 0, 0, 0}, + {PKT3_SET_SAMPLER, R600_SAMPLER_OFFSET, R_03C008_SQ_TEX_SAMPLER_WORD2_0, 0, 0, 0}, + }; + unsigned nreg = Elements(r600_shader_sampler); + + for (int i = 0; i < nreg; i++) { + r600_shader_sampler[i].offset += offset; + } + return r600_context_add_block(ctx, r600_shader_sampler, nreg); +} + +/* SHADER SAMPLER BORDER R600/R700 */ +static int r600_state_sampler_border_init(struct r600_context *ctx, u32 offset) +{ + struct r600_reg r600_shader_sampler_border[] = { + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_00A400_TD_PS_SAMPLER0_BORDER_RED, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, 0, 0, 0}, + {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, 0, 0, 0}, + }; + unsigned nreg = Elements(r600_shader_sampler_border); + + for (int i = 0; i < nreg; i++) { + r600_shader_sampler_border[i].offset += offset; + } + return r600_context_add_block(ctx, r600_shader_sampler_border, nreg); +} + +static int r600_loop_const_init(struct r600_context *ctx, u32 offset) +{ + unsigned nreg = 32; + struct r600_reg r600_loop_consts[32]; + int i; + + for (i = 0; i < nreg; i++) { + r600_loop_consts[i].opcode = PKT3_SET_LOOP_CONST; + r600_loop_consts[i].offset_base = R600_LOOP_CONST_OFFSET; + r600_loop_consts[i].offset = R600_LOOP_CONST_OFFSET + ((offset + i) * 4); + r600_loop_consts[i].need_bo = 0; + r600_loop_consts[i].flush_flags = 0; + r600_loop_consts[i].flush_mask = 0; + } + return r600_context_add_block(ctx, r600_loop_consts, nreg); +} + +/* initialize */ +void r600_context_fini(struct r600_context *ctx) +{ + struct r600_block *block; + struct r600_range *range; + + for (int i = 0; i < 256; i++) { + for (int j = 0; j < (1 << ctx->hash_shift); j++) { + block = ctx->range[i].blocks[j]; + if (block) { + for (int k = 0, offset = block->start_offset; k < block->nreg; k++, offset += 4) { + range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; + range->blocks[CTX_BLOCK_ID(ctx, offset)] = NULL; + } + free(block); + } + } + free(ctx->range[i].blocks); + } + free(ctx->reloc); + free(ctx->pm4); + if (ctx->fence_bo) { + r600_bo_reference(ctx->radeon, &ctx->fence_bo, NULL); + } + memset(ctx, 0, sizeof(struct r600_context)); +} + +int r600_context_init(struct r600_context *ctx, struct radeon *radeon) +{ + int r; + + memset(ctx, 0, sizeof(struct r600_context)); + ctx->radeon = radeon; + LIST_INITHEAD(&ctx->query_list); + + /* initialize hash */ + ctx->hash_size = 19; + ctx->hash_shift = 11; + for (int i = 0; i < 256; i++) { + ctx->range[i].start_offset = i << ctx->hash_shift; + ctx->range[i].end_offset = ((i + 1) << ctx->hash_shift) - 1; + ctx->range[i].blocks = calloc(1 << ctx->hash_shift, sizeof(void*)); + if (ctx->range[i].blocks == NULL) { + return -ENOMEM; + } + } + + /* add blocks */ + r = r600_context_add_block(ctx, r600_config_reg_list, + Elements(r600_config_reg_list)); + if (r) + goto out_err; + r = r600_context_add_block(ctx, r600_context_reg_list, + Elements(r600_context_reg_list)); + if (r) + goto out_err; + r = r600_context_add_block(ctx, r600_ctl_const_list, + Elements(r600_ctl_const_list)); + if (r) + goto out_err; + + /* PS SAMPLER BORDER */ + for (int j = 0, offset = 0; j < 18; j++, offset += 0x10) { + r = r600_state_sampler_border_init(ctx, offset); + if (r) + goto out_err; + } + + /* VS SAMPLER BORDER */ + for (int j = 0, offset = 0x200; j < 18; j++, offset += 0x10) { + r = r600_state_sampler_border_init(ctx, offset); + if (r) + goto out_err; + } + /* PS SAMPLER */ + for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) { + r = r600_state_sampler_init(ctx, offset); + if (r) + goto out_err; + } + /* VS SAMPLER */ + for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) { + r = r600_state_sampler_init(ctx, offset); + if (r) + goto out_err; + } + /* PS RESOURCE */ + for (int j = 0, offset = 0; j < 160; j++, offset += 0x1C) { + r = r600_state_resource_init(ctx, offset); + if (r) + goto out_err; + } + /* VS RESOURCE */ + for (int j = 0, offset = 0x1180; j < 160; j++, offset += 0x1C) { + r = r600_state_resource_init(ctx, offset); + if (r) + goto out_err; + } + + /* PS loop const */ + r600_loop_const_init(ctx, 0); + /* VS loop const */ + r600_loop_const_init(ctx, 32); + + /* setup block table */ + ctx->blocks = calloc(ctx->nblocks, sizeof(void*)); + for (int i = 0, c = 0; i < 256; i++) { + for (int j = 0, add; j < (1 << ctx->hash_shift); j++) { + if (ctx->range[i].blocks[j]) { + add = 1; + for (int k = 0; k < c; k++) { + if (ctx->blocks[k] == ctx->range[i].blocks[j]) { + add = 0; + break; + } + } + if (add) { + assert(c < ctx->nblocks); + ctx->blocks[c++] = ctx->range[i].blocks[j]; + j += (ctx->range[i].blocks[j]->nreg << 2) - 1; + } + } + } + } + + /* allocate cs variables */ + ctx->nreloc = RADEON_CTX_MAX_PM4; + ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); + if (ctx->reloc == NULL) { + r = -ENOMEM; + goto out_err; + } + ctx->bo = calloc(ctx->nreloc, sizeof(void *)); + if (ctx->bo == NULL) { + r = -ENOMEM; + goto out_err; + } + ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; + ctx->pm4 = calloc(ctx->pm4_ndwords, 4); + if (ctx->pm4 == NULL) { + r = -ENOMEM; + goto out_err; + } + /* save 16dwords space for fence mecanism */ + ctx->pm4_ndwords -= 16; + + r = r600_context_init_fence(ctx); + if (r) { + goto out_err; + } + + /* init dirty list */ + LIST_INITHEAD(&ctx->dirty); + return 0; +out_err: + r600_context_fini(ctx); + return r; +} + +void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, + unsigned flush_mask, struct r600_bo *rbo) +{ + struct radeon_bo *bo; + + bo = r600_bo_get_bo(rbo); + /* if bo has already been flush */ + if (!(bo->last_flush ^ flush_flags)) { + bo->last_flush &= flush_mask; + return; + } + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3); + ctx->pm4[ctx->pm4_cdwords++] = flush_flags; + ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8; + ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; + ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id; + bo->last_flush = (bo->last_flush | flush_flags) & flush_mask; +} + +void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo) +{ + struct radeon_bo *bo; + + bo = r600_bo_get_bo(rbo); + assert(bo != NULL); + if (bo->reloc) { + *pm4 = bo->reloc_id; + return; + } + bo->reloc = &ctx->reloc[ctx->creloc]; + bo->reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4; + ctx->reloc[ctx->creloc].handle = bo->handle; + ctx->reloc[ctx->creloc].read_domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; + ctx->reloc[ctx->creloc].write_domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; + ctx->reloc[ctx->creloc].flags = 0; + radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo); + ctx->creloc++; + /* set PKT3 to point to proper reloc */ + *pm4 = bo->reloc_id; +} + +void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state) +{ + struct r600_range *range; + struct r600_block *block; + + for (int i = 0; i < state->nregs; i++) { + unsigned id; + + range = &ctx->range[CTX_RANGE_ID(ctx, state->regs[i].offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, state->regs[i].offset)]; + id = (state->regs[i].offset - block->start_offset) >> 2; + block->reg[id] &= ~state->regs[i].mask; + block->reg[id] |= state->regs[i].value; + if (block->pm4_bo_index[id]) { + /* find relocation */ + id = block->pm4_bo_index[id]; + r600_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo); + } + if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + LIST_ADDTAIL(&block->list,&ctx->dirty); + } + } +} + +static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +{ + struct r600_range *range; + struct r600_block *block; + + range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + if (state == NULL) { + block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); + r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); + LIST_DELINIT(&block->list); + return; + } + block->reg[0] = state->regs[0].value; + block->reg[1] = state->regs[1].value; + block->reg[2] = state->regs[2].value; + block->reg[3] = state->regs[3].value; + block->reg[4] = state->regs[4].value; + block->reg[5] = state->regs[5].value; + block->reg[6] = state->regs[6].value; + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); + r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); + if (state->regs[0].bo) { + /* VERTEX RESOURCE, we preted there is 2 bo to relocate so + * we have single case btw VERTEX & TEXTURE resource + */ + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo); + } else { + /* TEXTURE RESOURCE */ + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); + } + if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + LIST_ADDTAIL(&block->list,&ctx->dirty); + } +} + +void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +{ + unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1C * rid; + + r600_context_pipe_state_set_resource(ctx, state, offset); +} + +void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +{ + unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1180 + 0x1C * rid; + + r600_context_pipe_state_set_resource(ctx, state, offset); +} + +static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +{ + struct r600_range *range; + struct r600_block *block; + + range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + if (state == NULL) { + block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); + LIST_DELINIT(&block->list); + return; + } + block->reg[0] = state->regs[0].value; + block->reg[1] = state->regs[1].value; + block->reg[2] = state->regs[2].value; + if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + LIST_ADDTAIL(&block->list,&ctx->dirty); + } +} + +static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +{ + struct r600_range *range; + struct r600_block *block; + + range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + if (state == NULL) { + block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); + LIST_DELINIT(&block->list); + return; + } + if (state->nregs <= 3) { + return; + } + block->reg[0] = state->regs[3].value; + block->reg[1] = state->regs[4].value; + block->reg[2] = state->regs[5].value; + block->reg[3] = state->regs[6].value; + if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + LIST_ADDTAIL(&block->list,&ctx->dirty); + } +} + +void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) +{ + unsigned offset; + + offset = 0x0003C000 + id * 0xc; + r600_context_pipe_state_set_sampler(ctx, state, offset); + offset = 0x0000A400 + id * 0x10; + r600_context_pipe_state_set_sampler_border(ctx, state, offset); +} + +void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) +{ + unsigned offset; + + offset = 0x0003C0D8 + id * 0xc; + r600_context_pipe_state_set_sampler(ctx, state, offset); + offset = 0x0000A600 + id * 0x10; + r600_context_pipe_state_set_sampler_border(ctx, state, offset); +} + +struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) +{ + struct r600_range *range; + struct r600_block *block; + unsigned id; + + range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + offset -= block->start_offset; + id = block->pm4_bo_index[offset >> 2]; + if (block->reloc[id].bo) { + return block->reloc[id].bo; + } + return NULL; +} + +void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) +{ + struct r600_bo *cb[8]; + struct r600_bo *db; + unsigned ndwords = 9; + struct r600_block *dirty_block = NULL; + struct r600_block *next_block; + + if (draw->indices) { + ndwords = 13; + /* make sure there is enough relocation space before scheduling draw */ + if (ctx->creloc >= (ctx->nreloc - 1)) { + r600_context_flush(ctx); + } + } + + /* find number of color buffer */ + db = r600_context_reg_bo(ctx, R_02800C_DB_DEPTH_BASE); + cb[0] = r600_context_reg_bo(ctx, R_028040_CB_COLOR0_BASE); + cb[1] = r600_context_reg_bo(ctx, R_028044_CB_COLOR1_BASE); + cb[2] = r600_context_reg_bo(ctx, R_028048_CB_COLOR2_BASE); + cb[3] = r600_context_reg_bo(ctx, R_02804C_CB_COLOR3_BASE); + cb[4] = r600_context_reg_bo(ctx, R_028050_CB_COLOR4_BASE); + cb[5] = r600_context_reg_bo(ctx, R_028054_CB_COLOR5_BASE); + cb[6] = r600_context_reg_bo(ctx, R_028058_CB_COLOR6_BASE); + cb[7] = r600_context_reg_bo(ctx, R_02805C_CB_COLOR7_BASE); + for (int i = 0; i < 8; i++) { + if (cb[i]) { + ndwords += 7; + } + } + if (db) + ndwords += 7; + + /* queries need some special values */ + if (ctx->num_query_running) { + if (ctx->radeon->family >= CHIP_RV770) { + r600_context_reg(ctx, + R_028D0C_DB_RENDER_CONTROL, + S_028D0C_R700_PERFECT_ZPASS_COUNTS(1), + S_028D0C_R700_PERFECT_ZPASS_COUNTS(1)); + } + r600_context_reg(ctx, + R_028D10_DB_RENDER_OVERRIDE, + S_028D10_NOOP_CULL_DISABLE(1), + S_028D10_NOOP_CULL_DISABLE(1)); + } + + if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { + /* need to flush */ + r600_context_flush(ctx); + } + /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ + if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { + R600_ERR("context is too big to be scheduled\n"); + return; + } + + /* enough room to copy packet */ + LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->dirty,list) { + r600_context_block_emit_dirty(ctx, dirty_block); + } + + /* draw packet */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0); + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0); + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; + if (draw->indices) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3); + ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); + } else { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1); + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + } + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT; + + /* flush color buffer */ + for (int i = 0; i < 8; i++) { + if (cb[i]) { + r600_context_bo_flush(ctx, + (S_0085F0_CB0_DEST_BASE_ENA(1) << i) | + S_0085F0_CB_ACTION_ENA(1), + 0, cb[i]); + } + } + if (db) { + r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1), 0, db); + } + + /* all dirty state have been scheduled in current cs */ + ctx->pm4_dirty_cdwords = 0; +} + +void r600_context_flush(struct r600_context *ctx) +{ + struct drm_radeon_cs drmib; + struct drm_radeon_cs_chunk chunks[2]; + uint64_t chunk_array[2]; + unsigned fence; + int r; + + if (!ctx->pm4_cdwords) + return; + + /* suspend queries */ + r600_context_queries_suspend(ctx); + + radeon_bo_pbmgr_flush_maps(ctx->radeon->kman); + + /* emit fence */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT | (5 << 8); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); + ctx->pm4[ctx->pm4_cdwords++] = ctx->fence; + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->fence_bo); + +#if 1 + /* emit cs */ + drmib.num_chunks = 2; + drmib.chunks = (uint64_t)(uintptr_t)chunk_array; + chunks[0].chunk_id = RADEON_CHUNK_ID_IB; + chunks[0].length_dw = ctx->pm4_cdwords; + chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4; + chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; + chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4; + chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc; + chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0]; + chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1]; + r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, + sizeof(struct drm_radeon_cs)); +#endif + + r600_context_update_fenced_list(ctx); + + fence = ctx->fence + 1; + if (fence < ctx->fence) { + /* wrap around */ + fence = 1; + r600_context_fence_wraparound(ctx, fence); + } + ctx->fence = fence; + + /* restart */ + for (int i = 0; i < ctx->creloc; i++) { + ctx->bo[i]->reloc = NULL; + ctx->bo[i]->last_flush = 0; + radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL); + } + ctx->creloc = 0; + ctx->pm4_dirty_cdwords = 0; + ctx->pm4_cdwords = 0; + + /* resume queries */ + r600_context_queries_resume(ctx); + + /* set all valid group as dirty so they get reemited on + * next draw command + */ + for (int i = 0; i < ctx->nblocks; i++) { + if (ctx->blocks[i]->status & R600_BLOCK_STATUS_ENABLED) { + if(!(ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY)) { + LIST_ADDTAIL(&ctx->blocks[i]->list,&ctx->dirty); + } + ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords; + ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY; + } + } +} + +void r600_context_dump_bof(struct r600_context *ctx, const char *file) +{ + bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root; + unsigned i; + + root = device_id = bcs = blob = array = bo = size = handle = NULL; + root = bof_object(); + if (root == NULL) + goto out_err; + device_id = bof_int32(ctx->radeon->device); + if (device_id == NULL) + goto out_err; + if (bof_object_set(root, "device_id", device_id)) + goto out_err; + bof_decref(device_id); + device_id = NULL; + /* dump relocs */ + blob = bof_blob(ctx->creloc * 16, ctx->reloc); + if (blob == NULL) + goto out_err; + if (bof_object_set(root, "reloc", blob)) + goto out_err; + bof_decref(blob); + blob = NULL; + /* dump cs */ + blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4); + if (blob == NULL) + goto out_err; + if (bof_object_set(root, "pm4", blob)) + goto out_err; + bof_decref(blob); + blob = NULL; + /* dump bo */ + array = bof_array(); + if (array == NULL) + goto out_err; + for (i = 0; i < ctx->creloc; i++) { + struct radeon_bo *rbo = ctx->bo[i]; + bo = bof_object(); + if (bo == NULL) + goto out_err; + size = bof_int32(rbo->size); + if (size == NULL) + goto out_err; + if (bof_object_set(bo, "size", size)) + goto out_err; + bof_decref(size); + size = NULL; + handle = bof_int32(rbo->handle); + if (handle == NULL) + goto out_err; + if (bof_object_set(bo, "handle", handle)) + goto out_err; + bof_decref(handle); + handle = NULL; + radeon_bo_map(ctx->radeon, rbo); + blob = bof_blob(rbo->size, rbo->data); + radeon_bo_unmap(ctx->radeon, rbo); + if (blob == NULL) + goto out_err; + if (bof_object_set(bo, "data", blob)) + goto out_err; + bof_decref(blob); + blob = NULL; + if (bof_array_append(array, bo)) + goto out_err; + bof_decref(bo); + bo = NULL; + } + if (bof_object_set(root, "bo", array)) + goto out_err; + bof_dump_file(root, file); +out_err: + bof_decref(blob); + bof_decref(array); + bof_decref(bo); + bof_decref(size); + bof_decref(handle); + bof_decref(device_id); + bof_decref(root); +} + +static void r600_query_result(struct r600_context *ctx, struct r600_query *query) +{ + u64 start, end; + u32 *results; + int i; + + results = r600_bo_map(ctx->radeon, query->buffer, 0, NULL); + for (i = 0; i < query->num_results; i += 4) { + start = (u64)results[i] | (u64)results[i + 1] << 32; + end = (u64)results[i + 2] | (u64)results[i + 3] << 32; + if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) { + query->result += end - start; + } + } + r600_bo_unmap(ctx->radeon, query->buffer); + query->num_results = 0; +} + +void r600_query_begin(struct r600_context *ctx, struct r600_query *query) +{ + /* query request needs 6 dwords for begin + 6 dwords for end */ + if ((12 + ctx->pm4_cdwords) > ctx->pm4_ndwords) { + /* need to flush */ + r600_context_flush(ctx); + } + + /* if query buffer is full force a flush */ + if (query->num_results >= ((query->buffer_size >> 2) - 2)) { + r600_context_flush(ctx); + r600_query_result(ctx, query); + } + + /* emit begin query */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE; + ctx->pm4[ctx->pm4_cdwords++] = query->num_results + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + + query->state |= R600_QUERY_STATE_STARTED; + query->state ^= R600_QUERY_STATE_ENDED; + ctx->num_query_running++; +} + +void r600_query_end(struct r600_context *ctx, struct r600_query *query) +{ + /* emit begin query */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE; + ctx->pm4[ctx->pm4_cdwords++] = query->num_results + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + + query->num_results += 16; + query->state ^= R600_QUERY_STATE_STARTED; + query->state |= R600_QUERY_STATE_ENDED; + ctx->num_query_running--; +} + +struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type) +{ + struct r600_query *query; + + if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) + return NULL; + + query = calloc(1, sizeof(struct r600_query)); + if (query == NULL) + return NULL; + + query->type = query_type; + query->buffer_size = 4096; + + query->buffer = r600_bo(ctx->radeon, query->buffer_size, 1, 0); + if (!query->buffer) { + free(query); + return NULL; + } + + LIST_ADDTAIL(&query->list, &ctx->query_list); + + return query; +} + +void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) +{ + r600_bo_reference(ctx->radeon, &query->buffer, NULL); + LIST_DELINIT(&query->list); + free(query); +} + +boolean r600_context_query_result(struct r600_context *ctx, + struct r600_query *query, + boolean wait, void *vresult) +{ + uint64_t *result = (uint64_t*)vresult; + + if (query->num_results) { + r600_context_flush(ctx); + } + r600_query_result(ctx, query); + *result = query->result; + query->result = 0; + return TRUE; +} + +void r600_context_queries_suspend(struct r600_context *ctx) +{ + struct r600_query *query; + + LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) { + if (query->state & R600_QUERY_STATE_STARTED) { + r600_query_end(ctx, query); + query->state |= R600_QUERY_STATE_SUSPENDED; + } + } +} + +void r600_context_queries_resume(struct r600_context *ctx) +{ + struct r600_query *query; + + LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) { + if (query->state & R600_QUERY_STATE_SUSPENDED) { + r600_query_begin(ctx, query); + query->state ^= R600_QUERY_STATE_SUSPENDED; + } + } +} diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 7a9025ad3c..08e243b00d 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -30,27 +30,152 @@ #include <stdint.h> #include <stdlib.h> #include <assert.h> +#include <pipebuffer/pb_bufmgr.h> +#include "util/u_double_list.h" #include "r600.h" - struct radeon { int fd; int refcount; unsigned device; unsigned family; + enum chip_class chip_class; + struct pb_manager *kman; /* kernel bo manager */ + struct pb_manager *cman; /* cached bo manager */ + struct r600_tiling_info tiling_info; }; struct radeon *r600_new(int fd, unsigned device); void r600_delete(struct radeon *r600); struct r600_reg { - unsigned need_bo; - unsigned flush_flags; - unsigned offset; + unsigned opcode; + unsigned offset_base; + unsigned offset; + unsigned need_bo; + unsigned flush_flags; + unsigned flush_mask; +}; + +struct radeon_bo { + struct pipe_reference reference; + unsigned handle; + unsigned size; + unsigned alignment; + unsigned map_count; + void *data; + struct list_head fencedlist; + unsigned fence; + struct r600_context *ctx; + boolean shared; + struct r600_reloc *reloc; + unsigned reloc_id; + unsigned last_flush; +}; + +struct r600_bo { + struct pipe_reference reference; + struct pb_buffer *pb; + unsigned size; }; /* radeon_pciid.c */ unsigned radeon_family_from_device(unsigned device); +/* r600_drm.c */ +struct radeon *radeon_decref(struct radeon *radeon); + +/* radeon_bo.c */ +struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, + unsigned size, unsigned alignment, void *ptr); +void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst, + struct radeon_bo *src); +int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); +int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain); +void radeon_bo_pbmgr_flush_maps(struct pb_manager *_mgr); +int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo); + + +/* radeon_bo_pb.c */ +struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf); +struct pb_manager *radeon_bo_pbmgr_create(struct radeon *radeon); +struct pb_buffer *radeon_bo_pb_create_buffer_from_handle(struct pb_manager *_mgr, + uint32_t handle); + +/* r600_hw_context.c */ +int r600_context_init_fence(struct r600_context *ctx); +void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo); +void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, + unsigned flush_mask, struct r600_bo *rbo); +struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset); +int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg); + +/* r600_bo.c */ +unsigned r600_bo_get_handle(struct r600_bo *bo); +unsigned r600_bo_get_size(struct r600_bo *bo); +static INLINE struct radeon_bo *r600_bo_get_bo(struct r600_bo *bo) +{ + return radeon_bo_pb_get_bo(bo->pb); +} + +#define CTX_RANGE_ID(ctx, offset) (((offset) >> (ctx)->hash_shift) & 255) +#define CTX_BLOCK_ID(ctx, offset) ((offset) & ((1 << (ctx)->hash_shift) - 1)) + +static void inline r600_context_reg(struct r600_context *ctx, + unsigned offset, unsigned value, + unsigned mask) +{ + struct r600_range *range; + struct r600_block *block; + unsigned id; + + range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; + block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + id = (offset - block->start_offset) >> 2; + block->reg[id] &= ~mask; + block->reg[id] |= value; + if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { + ctx->pm4_dirty_cdwords += block->pm4_ndwords; + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + LIST_ADDTAIL(&block->list,&ctx->dirty); + } +} + +static inline void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block) +{ + int id; + + for (int j = 0; j < block->nreg; j++) { + if (block->pm4_bo_index[j]) { + /* find relocation */ + id = block->pm4_bo_index[j]; + r600_context_bo_reloc(ctx, + &block->pm4[block->reloc[id].bo_pm4_index], + block->reloc[id].bo); + r600_context_bo_flush(ctx, + block->reloc[id].flush_flags, + block->reloc[id].flush_mask, + block->reloc[id].bo); + } + } + memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4); + ctx->pm4_cdwords += block->pm4_ndwords; + block->status ^= R600_BLOCK_STATUS_DIRTY; + LIST_DELINIT(&block->list); +} + +static inline int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo) +{ + bo->map_count++; + return 0; +} + +static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo) +{ + bo->map_count--; + assert(bo->map_count >= 0); +} + #endif diff --git a/src/gallium/winsys/r600/drm/r600_state.c b/src/gallium/winsys/r600/drm/r600_state.c deleted file mode 100644 index b04885a85f..0000000000 --- a/src/gallium/winsys/r600/drm/r600_state.c +++ /dev/null @@ -1,658 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include "radeon_priv.h" -#include "r600d.h" - -#include "util/u_memory.h" - -static int r600_state_pm4_resource(struct radeon_state *state); -static int r600_state_pm4_cb0(struct radeon_state *state); -static int r600_state_pm4_vgt(struct radeon_state *state); -static int r600_state_pm4_db(struct radeon_state *state); -static int r600_state_pm4_shader(struct radeon_state *state); -static int r600_state_pm4_draw(struct radeon_state *state); -static int r600_state_pm4_config(struct radeon_state *state); -static int r600_state_pm4_generic(struct radeon_state *state); -static int r600_state_pm4_query_begin(struct radeon_state *state); -static int r600_state_pm4_query_end(struct radeon_state *state); -static int r700_state_pm4_config(struct radeon_state *state); -static int r600_state_pm4_db_flush(struct radeon_state *state); -static int r600_state_pm4_cb_flush(struct radeon_state *state); - -static int eg_state_pm4_vgt(struct radeon_state *state); - -#include "r600_states.h" -#include "eg_states.h" - - -#define SUB_NONE(param) { { 0, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) } } -#define SUB_PS(param) { R600_SHADER_PS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) } -#define SUB_VS(param) { R600_SHADER_VS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) } -#define SUB_GS(param) { R600_SHADER_GS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) } -#define SUB_FS(param) { R600_SHADER_FS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) } - -#define EG_SUB_NONE(param) { { 0, EG_names_##param, (sizeof(EG_names_##param)/sizeof(struct radeon_register)) } } -#define EG_SUB_PS(param) { R600_SHADER_PS, EG_names_##param, (sizeof(EG_names_##param)/sizeof(struct radeon_register)) } -#define EG_SUB_VS(param) { R600_SHADER_VS, EG_names_##param, (sizeof(EG_names_##param)/sizeof(struct radeon_register)) } -#define EG_SUB_GS(param) { R600_SHADER_GS, EG_names_##param, (sizeof(EG_names_##param)/sizeof(struct radeon_register)) } -#define EG_SUB_FS(param) { R600_SHADER_FS, EG_names_##param, (sizeof(EG_names_##param)/sizeof(struct radeon_register)) } - -/* some of these are overriden at runtime for R700 */ -struct radeon_stype_info r600_stypes[] = { - { R600_STATE_CONFIG, 1, 0, r600_state_pm4_config, SUB_NONE(CONFIG), }, - { R600_STATE_CB_CNTL, 1, 0, r600_state_pm4_generic, SUB_NONE(CB_CNTL) }, - { R600_STATE_RASTERIZER, 1, 0, r600_state_pm4_generic, SUB_NONE(RASTERIZER) }, - { R600_STATE_VIEWPORT, 1, 0, r600_state_pm4_generic, SUB_NONE(VIEWPORT) }, - { R600_STATE_SCISSOR, 1, 0, r600_state_pm4_generic, SUB_NONE(SCISSOR) }, - { R600_STATE_BLEND, 1, 0, r600_state_pm4_generic, SUB_NONE(BLEND), }, - { R600_STATE_DSA, 1, 0, r600_state_pm4_generic, SUB_NONE(DSA), }, - { R600_STATE_SHADER, 1, 0, r600_state_pm4_shader, { SUB_PS(PS_SHADER), SUB_VS(VS_SHADER) } }, - { R600_STATE_CBUF, 1, 0, r600_state_pm4_shader, { SUB_PS(PS_CBUF), SUB_VS(VS_CBUF) } }, - { R600_STATE_CONSTANT, 256, 0x10, r600_state_pm4_generic, { SUB_PS(PS_CONSTANT), SUB_VS(VS_CONSTANT) } }, - { R600_STATE_RESOURCE, 160, 0x1c, r600_state_pm4_resource, { SUB_PS(PS_RESOURCE), SUB_VS(VS_RESOURCE), SUB_GS(GS_RESOURCE), SUB_FS(FS_RESOURCE)} }, - { R600_STATE_SAMPLER, 18, 0xc, r600_state_pm4_generic, { SUB_PS(PS_SAMPLER), SUB_VS(VS_SAMPLER), SUB_GS(GS_SAMPLER) } }, - { R600_STATE_SAMPLER_BORDER, 18, 0x10, r600_state_pm4_generic, { SUB_PS(PS_SAMPLER_BORDER), SUB_VS(VS_SAMPLER_BORDER), SUB_GS(GS_SAMPLER_BORDER) } }, - { R600_STATE_CB0, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB0) }, - { R600_STATE_CB1, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB1) }, - { R600_STATE_CB2, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB2) }, - { R600_STATE_CB3, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB3) }, - { R600_STATE_CB4, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB4) }, - { R600_STATE_CB5, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB5) }, - { R600_STATE_CB6, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB6) }, - { R600_STATE_CB7, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB7) }, - { R600_STATE_QUERY_BEGIN, 1, 0, r600_state_pm4_query_begin, SUB_NONE(VGT_EVENT) }, - { R600_STATE_QUERY_END, 1, 0, r600_state_pm4_query_end, SUB_NONE(VGT_EVENT) }, - { R600_STATE_DB, 1, 0, r600_state_pm4_db, SUB_NONE(DB) }, - { R600_STATE_UCP, 1, 0, r600_state_pm4_generic, SUB_NONE(UCP) }, - { R600_STATE_VGT, 1, 0, r600_state_pm4_vgt, SUB_NONE(VGT) }, - { R600_STATE_DRAW, 1, 0, r600_state_pm4_draw, SUB_NONE(DRAW) }, - { R600_STATE_CB_FLUSH, 1, 0, r600_state_pm4_cb_flush, SUB_NONE(CB_FLUSH) }, - { R600_STATE_DB_FLUSH, 1, 0, r600_state_pm4_db_flush, SUB_NONE(DB_FLUSH) }, -}; -#define STYPES_SIZE Elements(r600_stypes) - -struct radeon_stype_info eg_stypes[] = { - { R600_STATE_CONFIG, 1, 0, r700_state_pm4_config, EG_SUB_NONE(CONFIG), }, - { R600_STATE_CB_CNTL, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(CB_CNTL) }, - { R600_STATE_RASTERIZER, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(RASTERIZER) }, - { R600_STATE_VIEWPORT, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(VIEWPORT) }, - { R600_STATE_SCISSOR, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(SCISSOR) }, - { R600_STATE_BLEND, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(BLEND), }, - { R600_STATE_DSA, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(DSA), }, - { R600_STATE_SHADER, 1, 0, r600_state_pm4_shader, { EG_SUB_PS(PS_SHADER), EG_SUB_VS(VS_SHADER) } }, - { R600_STATE_CBUF, 1, 0, r600_state_pm4_shader, { EG_SUB_PS(PS_CBUF), EG_SUB_VS(VS_CBUF) } }, - { R600_STATE_RESOURCE, 176, 0x20, r600_state_pm4_resource, { EG_SUB_PS(PS_RESOURCE), EG_SUB_VS(VS_RESOURCE), EG_SUB_GS(GS_RESOURCE), EG_SUB_FS(FS_RESOURCE)} }, - { R600_STATE_SAMPLER, 18, 0xc, r600_state_pm4_generic, { EG_SUB_PS(PS_SAMPLER), EG_SUB_VS(VS_SAMPLER), EG_SUB_GS(GS_SAMPLER) } }, - { R600_STATE_SAMPLER_BORDER, 18, 0x10, r600_state_pm4_generic, { EG_SUB_PS(PS_SAMPLER_BORDER), EG_SUB_VS(VS_SAMPLER_BORDER), EG_SUB_GS(GS_SAMPLER_BORDER) } }, - { R600_STATE_CB0, 11, 0x3c, r600_state_pm4_generic, EG_SUB_NONE(CB) }, - { R600_STATE_QUERY_BEGIN, 1, 0, r600_state_pm4_query_begin, EG_SUB_NONE(VGT_EVENT) }, - { R600_STATE_QUERY_END, 1, 0, r600_state_pm4_query_end, EG_SUB_NONE(VGT_EVENT) }, - { R600_STATE_DB, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(DB) }, - { R600_STATE_UCP, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(UCP) }, - { R600_STATE_VGT, 1, 0, eg_state_pm4_vgt, EG_SUB_NONE(VGT) }, - { R600_STATE_DRAW, 1, 0, r600_state_pm4_draw, EG_SUB_NONE(DRAW) }, - { R600_STATE_CB_FLUSH, 1, 0, r600_state_pm4_cb_flush, EG_SUB_NONE(CB_FLUSH) }, - { R600_STATE_DB_FLUSH, 1, 0, r600_state_pm4_db_flush, EG_SUB_NONE(DB_FLUSH) }, - -}; -#define EG_STYPES_SIZE Elements(eg_stypes) - -static const struct radeon_register *get_regs(struct radeon_state *state) -{ - return state->stype->reginfo[state->shader_index].regs; -} - -/* - * r600/r700 state functions - */ -static int r600_state_pm4_bytecode(struct radeon_state *state, unsigned offset, unsigned id, unsigned nreg) -{ - const struct radeon_register *regs = get_regs(state); - unsigned i; - int r; - - if (!offset) { - fprintf(stderr, "%s invalid register for state %d %d\n", - __func__, state->stype->stype, id); - return -EINVAL; - } - if (offset >= R600_CONFIG_REG_OFFSET && offset < R600_CONFIG_REG_END) { - state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONFIG_REG, nreg); - state->pm4[state->cpm4++] = (offset - R600_CONFIG_REG_OFFSET) >> 2; - for (i = 0; i < nreg; i++) { - state->pm4[state->cpm4++] = state->states[id + i]; - } - for (i = 0; i < nreg; i++) { - if (regs[id + i].need_reloc) { - state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0); - r = radeon_state_reloc(state, state->cpm4, regs[id + i].bo_id); - if (r) - return r; - state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[regs[id + i].bo_id]); - } - } - return 0; - } - if (offset >= R600_CONTEXT_REG_OFFSET && offset < R600_CONTEXT_REG_END) { - state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONTEXT_REG, nreg); - state->pm4[state->cpm4++] = (offset - R600_CONTEXT_REG_OFFSET) >> 2; - for (i = 0; i < nreg; i++) { - state->pm4[state->cpm4++] = state->states[id + i]; - } - for (i = 0; i < nreg; i++) { - if (regs[id + i].need_reloc) { - state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0); - r = radeon_state_reloc(state, state->cpm4, regs[id + i].bo_id); - if (r) - return r; - state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[regs[id + i].bo_id]); - } - } - return 0; - } - if (offset >= R600_ALU_CONST_OFFSET && offset < R600_ALU_CONST_END) { - state->pm4[state->cpm4++] = PKT3(PKT3_SET_ALU_CONST, nreg); - state->pm4[state->cpm4++] = (offset - R600_ALU_CONST_OFFSET) >> 2; - for (i = 0; i < nreg; i++) { - state->pm4[state->cpm4++] = state->states[id + i]; - } - return 0; - } - if (offset >= R600_SAMPLER_OFFSET && offset < R600_SAMPLER_END) { - state->pm4[state->cpm4++] = PKT3(PKT3_SET_SAMPLER, nreg); - state->pm4[state->cpm4++] = (offset - R600_SAMPLER_OFFSET) >> 2; - for (i = 0; i < nreg; i++) { - state->pm4[state->cpm4++] = state->states[id + i]; - } - return 0; - } - fprintf(stderr, "%s unsupported offset 0x%08X\n", __func__, offset); - return -EINVAL; -} - -static int eg_state_pm4_bytecode(struct radeon_state *state, unsigned offset, unsigned id, unsigned nreg) -{ - const struct radeon_register *regs = get_regs(state); - unsigned i; - int r; - - if (!offset) { - fprintf(stderr, "%s invalid register for state %d %d\n", - __func__, state->stype->stype, id); - return -EINVAL; - } - if (offset >= R600_CONFIG_REG_OFFSET && offset < R600_CONFIG_REG_END) { - state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONFIG_REG, nreg); - state->pm4[state->cpm4++] = (offset - R600_CONFIG_REG_OFFSET) >> 2; - for (i = 0; i < nreg; i++) { - state->pm4[state->cpm4++] = state->states[id + i]; - } - for (i = 0; i < nreg; i++) { - if (regs[id + i].need_reloc) { - state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0); - r = radeon_state_reloc(state, state->cpm4, regs[id + i].bo_id); - if (r) - return r; - state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[regs[id + i].bo_id]); - } - } - return 0; - } - if (offset >= R600_CONTEXT_REG_OFFSET && offset < R600_CONTEXT_REG_END) { - state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONTEXT_REG, nreg); - state->pm4[state->cpm4++] = (offset - R600_CONTEXT_REG_OFFSET) >> 2; - for (i = 0; i < nreg; i++) { - state->pm4[state->cpm4++] = state->states[id + i]; - } - for (i = 0; i < nreg; i++) { - if (regs[id + i].need_reloc) { - state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0); - r = radeon_state_reloc(state, state->cpm4, regs[id + i].bo_id); - if (r) - return r; - state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[regs[id + i].bo_id]); - } - } - return 0; - } - if (offset >= EG_RESOURCE_OFFSET && offset < EG_RESOURCE_END) { - state->pm4[state->cpm4++] = PKT3(PKT3_SET_RESOURCE, nreg); - state->pm4[state->cpm4++] = (offset - EG_RESOURCE_OFFSET) >> 2; - for (i = 0; i < nreg; i++) { - state->pm4[state->cpm4++] = state->states[id + i]; - } - return 0; - } - if (offset >= R600_SAMPLER_OFFSET && offset < R600_SAMPLER_END) { - state->pm4[state->cpm4++] = PKT3(PKT3_SET_SAMPLER, nreg); - state->pm4[state->cpm4++] = (offset - R600_SAMPLER_OFFSET) >> 2; - for (i = 0; i < nreg; i++) { - state->pm4[state->cpm4++] = state->states[id + i]; - } - return 0; - } - fprintf(stderr, "%s unsupported offset 0x%08X\n", __func__, offset); - return -EINVAL; -} - - -static int r600_state_pm4_generic(struct radeon_state *state) -{ - const struct radeon_register *regs = get_regs(state); - unsigned i, offset, nreg, coffset, loffset, soffset; - unsigned start; - int r; - - if (!state->nstates) - return 0; - soffset = state->id * state->stype->stride; - offset = loffset = regs[0].offset + soffset; - start = 0; - for (i = 1, nreg = 1; i < state->nstates; i++) { - coffset = regs[i].offset + soffset; - if (coffset == (loffset + 4)) { - nreg++; - loffset = coffset; - } else { - if (state->radeon->family >= CHIP_CEDAR) - r = eg_state_pm4_bytecode(state, offset, start, nreg); - else - r = r600_state_pm4_bytecode(state, offset, start, nreg); - if (r) { - fprintf(stderr, "%s invalid 0x%08X %d\n", __func__, start, nreg); - return r; - } - offset = loffset = coffset; - nreg = 1; - start = i; - } - } - if (state->radeon->family >= CHIP_CEDAR) - r = eg_state_pm4_bytecode(state, offset, start, nreg); - else - r = r600_state_pm4_bytecode(state, offset, start, nreg); - return r; -} - -static void r600_state_pm4_with_flush(struct radeon_state *state, u32 flags, int bufs_are_cbs) -{ - unsigned i, j, add, size; - - state->nreloc = 0; - for (i = 0; i < state->nbo; i++) { - for (j = 0, add = 1; j < state->nreloc; j++) { - if (state->bo[state->reloc_bo_id[j]] == state->bo[i]) { - add = 0; - break; - } - } - if (add) { - state->reloc_bo_id[state->nreloc++] = i; - } - } - for (i = 0; i < state->nreloc; i++) { - size = (radeon_ws_bo_get_size(state->bo[state->reloc_bo_id[i]]) + 255) >> 8; - state->pm4[state->cpm4++] = PKT3(PKT3_SURFACE_SYNC, 3); - if (bufs_are_cbs) - flags |= S_0085F0_CB0_DEST_BASE_ENA(1 << i); - state->pm4[state->cpm4++] = flags; - state->pm4[state->cpm4++] = size; - state->pm4[state->cpm4++] = 0x00000000; - state->pm4[state->cpm4++] = 0x0000000A; - state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0); - state->reloc_pm4_id[i] = state->cpm4; - state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[state->reloc_bo_id[i]]); - } -} - -static int r600_state_pm4_cb0(struct radeon_state *state) -{ - int r; - - r = r600_state_pm4_generic(state); - if (r) - return r; - state->pm4[state->cpm4++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0); - state->pm4[state->cpm4++] = 0x00000002; - return 0; -} - -static int r600_state_pm4_db(struct radeon_state *state) -{ - int r; - - r = r600_state_pm4_generic(state); - if (r) - return r; - state->pm4[state->cpm4++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0); - state->pm4[state->cpm4++] = 0x00000001; - return 0; -} - -static int r600_state_pm4_config(struct radeon_state *state) -{ - state->pm4[state->cpm4++] = PKT3(PKT3_START_3D_CMDBUF, 0); - state->pm4[state->cpm4++] = 0x00000000; - state->pm4[state->cpm4++] = PKT3(PKT3_CONTEXT_CONTROL, 1); - state->pm4[state->cpm4++] = 0x80000000; - state->pm4[state->cpm4++] = 0x80000000; - state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 0); - state->pm4[state->cpm4++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT; - state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONFIG_REG, 1); - state->pm4[state->cpm4++] = 0x00000010; - state->pm4[state->cpm4++] = 0x00028000; - return r600_state_pm4_generic(state); -} - -static int r600_state_pm4_query_begin(struct radeon_state *state) -{ - int r; - - state->cpm4 = 0; - state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 2); - state->pm4[state->cpm4++] = EVENT_TYPE_ZPASS_DONE; - state->pm4[state->cpm4++] = state->states[0]; - state->pm4[state->cpm4++] = 0x0; - state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0); - r = radeon_state_reloc(state, state->cpm4, 0); - if (r) - return r; - state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[0]); - return 0; -} - -static int r600_state_pm4_query_end(struct radeon_state *state) -{ - int r; - - state->cpm4 = 0; - state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 2); - state->pm4[state->cpm4++] = EVENT_TYPE_ZPASS_DONE; - state->pm4[state->cpm4++] = state->states[0]; - state->pm4[state->cpm4++] = 0x0; - state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0); - r = radeon_state_reloc(state, state->cpm4, 0); - if (r) - return r; - state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[0]); - return 0; -} - -static int r700_state_pm4_config(struct radeon_state *state) -{ - state->pm4[state->cpm4++] = PKT3(PKT3_CONTEXT_CONTROL, 1); - state->pm4[state->cpm4++] = 0x80000000; - state->pm4[state->cpm4++] = 0x80000000; - state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 0); - state->pm4[state->cpm4++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT; - state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONFIG_REG, 1); - state->pm4[state->cpm4++] = 0x00000010; - state->pm4[state->cpm4++] = 0x00028000; - return r600_state_pm4_generic(state); -} - -static int r600_state_pm4_shader(struct radeon_state *state) -{ - r600_state_pm4_with_flush(state, S_0085F0_SH_ACTION_ENA(1), 0); - return r600_state_pm4_generic(state); -} - -static int eg_state_pm4_vgt(struct radeon_state *state) -{ - int r; - r = eg_state_pm4_bytecode(state, R_028400_VGT_MAX_VTX_INDX, EG_VGT__VGT_MAX_VTX_INDX, 1); - if (r) - return r; - r = eg_state_pm4_bytecode(state, R_028404_VGT_MIN_VTX_INDX, EG_VGT__VGT_MIN_VTX_INDX, 1); - if (r) - return r; - r = eg_state_pm4_bytecode(state, R_028408_VGT_INDX_OFFSET, EG_VGT__VGT_INDX_OFFSET, 1); - if (r) - return r; - r = eg_state_pm4_bytecode(state, R_008958_VGT_PRIMITIVE_TYPE, EG_VGT__VGT_PRIMITIVE_TYPE, 1); - if (r) - return r; - state->pm4[state->cpm4++] = PKT3(PKT3_INDEX_TYPE, 0); - state->pm4[state->cpm4++] = state->states[EG_VGT__VGT_DMA_INDEX_TYPE]; - state->pm4[state->cpm4++] = PKT3(PKT3_NUM_INSTANCES, 0); - state->pm4[state->cpm4++] = state->states[EG_VGT__VGT_DMA_NUM_INSTANCES]; - return 0; -} - -static int r600_state_pm4_vgt(struct radeon_state *state) -{ - int r; - - r = r600_state_pm4_bytecode(state, R_028400_VGT_MAX_VTX_INDX, R600_VGT__VGT_MAX_VTX_INDX, 1); - if (r) - return r; - r = r600_state_pm4_bytecode(state, R_028404_VGT_MIN_VTX_INDX, R600_VGT__VGT_MIN_VTX_INDX, 1); - if (r) - return r; - r = r600_state_pm4_bytecode(state, R_028408_VGT_INDX_OFFSET, R600_VGT__VGT_INDX_OFFSET, 1); - if (r) - return r; - r = r600_state_pm4_bytecode(state, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX, 1); - if (r) - return r; - r = r600_state_pm4_bytecode(state, R_008958_VGT_PRIMITIVE_TYPE, R600_VGT__VGT_PRIMITIVE_TYPE, 1); - if (r) - return r; - state->pm4[state->cpm4++] = PKT3(PKT3_INDEX_TYPE, 0); - state->pm4[state->cpm4++] = state->states[R600_VGT__VGT_DMA_INDEX_TYPE]; - state->pm4[state->cpm4++] = PKT3(PKT3_NUM_INSTANCES, 0); - state->pm4[state->cpm4++] = state->states[R600_VGT__VGT_DMA_NUM_INSTANCES]; - return 0; -} - -static int r600_state_pm4_draw(struct radeon_state *state) -{ - int r; - - if (state->nbo) { - state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX, 3); - state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DMA_BASE]; - state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DMA_BASE_HI]; - state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES]; - state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DRAW_INITIATOR]; - state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0); - r = radeon_state_reloc(state, state->cpm4, 0); - if (r) - return r; - state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[0]); - } else { - state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1); - state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES]; - state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DRAW_INITIATOR]; - } - state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 0); - state->pm4[state->cpm4++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT; - - return 0; -} - -static int r600_state_pm4_cb_flush(struct radeon_state *state) -{ - if (!state->nbo) - return 0; - - r600_state_pm4_with_flush(state, S_0085F0_CB_ACTION_ENA(1), 1); - - return 0; -} - -static int r600_state_pm4_db_flush(struct radeon_state *state) -{ - if (!state->nbo) - return 0; - - r600_state_pm4_with_flush(state, S_0085F0_DB_ACTION_ENA(1) | - S_0085F0_DB_DEST_BASE_ENA(1), 0); - - return 0; -} - -static int r600_state_pm4_resource(struct radeon_state *state) -{ - u32 flags, type, nbo, offset, soffset; - int r, nres; - const struct radeon_register *regs = get_regs(state); - - soffset = state->id * state->stype->stride; - if (state->radeon->family >= CHIP_CEDAR) - type = G_038018_TYPE(state->states[7]); - else - type = G_038018_TYPE(state->states[6]); - - switch (type) { - case 2: - flags = S_0085F0_TC_ACTION_ENA(1); - nbo = 2; - break; - case 3: - flags = S_0085F0_VC_ACTION_ENA(1); - nbo = 1; - break; - default: - return 0; - } - if (state->nbo != nbo) { - fprintf(stderr, "%s need %d bo got %d\n", __func__, nbo, state->nbo); - return -EINVAL; - } - r600_state_pm4_with_flush(state, flags, 0); - offset = regs[0].offset + soffset; - if (state->radeon->family >= CHIP_CEDAR) - nres = 8; - else - nres = 7; - state->pm4[state->cpm4++] = PKT3(PKT3_SET_RESOURCE, nres); - if (state->radeon->family >= CHIP_CEDAR) - state->pm4[state->cpm4++] = (offset - EG_RESOURCE_OFFSET) >> 2; - else - state->pm4[state->cpm4++] = (offset - R_038000_SQ_TEX_RESOURCE_WORD0_0) >> 2; - state->pm4[state->cpm4++] = state->states[0]; - state->pm4[state->cpm4++] = state->states[1]; - state->pm4[state->cpm4++] = state->states[2]; - state->pm4[state->cpm4++] = state->states[3]; - state->pm4[state->cpm4++] = state->states[4]; - state->pm4[state->cpm4++] = state->states[5]; - state->pm4[state->cpm4++] = state->states[6]; - if (state->radeon->family >= CHIP_CEDAR) - state->pm4[state->cpm4++] = state->states[7]; - - state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0); - r = radeon_state_reloc(state, state->cpm4, 0); - if (r) - return r; - state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[0]); - if (type == 2) { - state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0); - r = radeon_state_reloc(state, state->cpm4, 1); - if (r) - return r; - state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[1]); - } - return 0; -} - - -static void r600_modify_type_array(struct radeon *radeon) -{ - int i; - switch (radeon->family) { - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - break; - default: - return; - } - - /* r700 needs some mods */ - for (i = 0; i < radeon->nstype; i++) { - struct radeon_stype_info *info = &radeon->stype[i]; - - switch(info->stype) { - case R600_STATE_CONFIG: - info->pm4 = r700_state_pm4_config; - break; - case R600_STATE_CB0: - info->pm4 = r600_state_pm4_generic; - break; - case R600_STATE_DB: - info->pm4 = r600_state_pm4_generic; - break; - }; - } -} - -static void build_types_array(struct radeon *radeon, struct radeon_stype_info *types, int size) -{ - int i, j; - int id = 0; - - for (i = 0; i < size; i++) { - types[i].base_id = id; - types[i].npm4 = 128; - if (types[i].reginfo[0].shader_type == 0) { - id += types[i].num; - } else { - for (j = 0; j < R600_SHADER_MAX; j++) { - if (types[i].reginfo[j].shader_type) - id += types[i].num; - } - } - } - radeon->max_states = id; - radeon->stype = types; - radeon->nstype = size; -} - -static void r600_build_types_array(struct radeon *radeon) -{ - build_types_array(radeon, r600_stypes, STYPES_SIZE); - r600_modify_type_array(radeon); -} - -static void eg_build_types_array(struct radeon *radeon) -{ - build_types_array(radeon, eg_stypes, EG_STYPES_SIZE); -} - -int r600_init(struct radeon *radeon) -{ - if (radeon->family >= CHIP_CEDAR) - eg_build_types_array(radeon); - else - r600_build_types_array(radeon); - return 0; -} diff --git a/src/gallium/winsys/r600/drm/r600_state2.c b/src/gallium/winsys/r600/drm/r600_state2.c deleted file mode 100644 index b3d618748d..0000000000 --- a/src/gallium/winsys/r600/drm/r600_state2.c +++ /dev/null @@ -1,1056 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include <errno.h> -#include <stdint.h> -#include <string.h> -#include <stdlib.h> -#include <assert.h> -#include "xf86drm.h" -#include "r600.h" -#include "r600d.h" -#include "r600_priv.h" -#include "radeon_drm.h" -#include "bof.h" -#include "pipe/p_compiler.h" -#include "util/u_inlines.h" -#include <pipebuffer/pb_bufmgr.h> - -struct radeon_ws_bo { - struct pipe_reference reference; - struct pb_buffer *pb; -}; - -struct radeon_bo { - struct pipe_reference reference; - unsigned handle; - unsigned size; - unsigned alignment; - unsigned map_count; - void *data; -}; -struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf); -int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo); -void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo); - -unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo *pb_bo); - -static int r600_group_id_register_offset(unsigned offset) -{ - if (offset >= R600_CONFIG_REG_OFFSET && offset < R600_CONFIG_REG_END) { - return R600_GROUP_CONFIG; - } - if (offset >= R600_CONTEXT_REG_OFFSET && offset < R600_CONTEXT_REG_END) { - return R600_GROUP_CONTEXT; - } - if (offset >= R600_ALU_CONST_OFFSET && offset < R600_ALU_CONST_END) { - return R600_GROUP_ALU_CONST; - } - if (offset >= R600_RESOURCE_OFFSET && offset < R600_RESOURCE_END) { - return R600_GROUP_RESOURCE; - } - if (offset >= R600_SAMPLER_OFFSET && offset < R600_SAMPLER_END) { - return R600_GROUP_SAMPLER; - } - if (offset >= R600_CTL_CONST_OFFSET && offset < R600_CTL_CONST_END) { - return R600_GROUP_CTL_CONST; - } - if (offset >= R600_LOOP_CONST_OFFSET && offset < R600_LOOP_CONST_END) { - return R600_GROUP_LOOP_CONST; - } - if (offset >= R600_BOOL_CONST_OFFSET && offset < R600_BOOL_CONST_END) { - return R600_GROUP_BOOL_CONST; - } - return -1; -} - -static int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg) -{ - struct r600_group_block *block, *tmp; - struct r600_group *group; - int group_id, id; - - for (unsigned i = 0, n = 0; i < nreg; i += n) { - u32 j, r; - /* find number of consecutive registers */ - for (j = i + 1, r = reg[i].offset + 4, n = 1; j < (nreg - i); j++, n++, r+=4) { - if (r != reg[j].offset) { - break; - } - } - - /* find into which group this block is */ - group_id = r600_group_id_register_offset(reg[i].offset); - assert(group_id >= 0); - group = &ctx->groups[group_id]; - - /* allocate new block */ - tmp = realloc(group->blocks, (group->nblocks + 1) * sizeof(struct r600_group_block)); - if (tmp == NULL) { - return -ENOMEM; - } - group->blocks = tmp; - block = &group->blocks[group->nblocks++]; - for (int j = 0; j < n; j++) { - group->offset_block_id[((reg[i].offset - group->start_offset) >> 2) + j] = group->nblocks - 1; - } - - /* initialize block */ - memset(block, 0, sizeof(struct r600_group_block)); - block->start_offset = reg[i].offset; - block->pm4_ndwords = n; - block->nreg = n; - for (j = 0; j < n; j++) { - if (reg[i+j].need_bo) { - block->nbo++; - assert(block->nbo < R600_BLOCK_MAX_BO); - block->pm4_bo_index[j] = block->nbo; - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0); - block->pm4[block->pm4_ndwords++] = 0x00000000; - block->reloc[block->nbo].bo_pm4_index[block->reloc[block->nbo].nreloc++] = block->pm4_ndwords - 1; - } - } - for (j = 0; j < n; j++) { - if (reg[i+j].flush_flags) { - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_SYNC, 3); - block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags; - block->pm4[block->pm4_ndwords++] = 0xFFFFFFFF; - block->pm4[block->pm4_ndwords++] = 0x00000000; - block->pm4[block->pm4_ndwords++] = 0x0000000A; - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0); - block->pm4[block->pm4_ndwords++] = 0x00000000; - id = block->pm4_bo_index[j]; - block->reloc[id].bo_pm4_index[block->reloc[id].nreloc++] = block->pm4_ndwords - 1; - } - } - /* check that we stay in limit */ - assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); - } - return 0; -} - -static int r600_group_init(struct r600_group *group, unsigned start_offset, unsigned end_offset) -{ - group->start_offset = start_offset; - group->end_offset = end_offset; - group->nblocks = 0; - group->blocks = NULL; - group->offset_block_id = calloc((end_offset - start_offset) >> 2, sizeof(unsigned)); - if (group->offset_block_id == NULL) - return -ENOMEM; - return 0; -} - -static void r600_group_fini(struct r600_group *group) -{ - free(group->offset_block_id); - free(group->blocks); -} - -/* R600/R700 configuration */ -static const struct r600_reg r600_reg_list[] = { - {0, 0, R_008C00_SQ_CONFIG}, - {0, 0, R_008C04_SQ_GPR_RESOURCE_MGMT_1}, - {0, 0, R_008C08_SQ_GPR_RESOURCE_MGMT_2}, - {0, 0, R_008C0C_SQ_THREAD_RESOURCE_MGMT}, - {0, 0, R_008C10_SQ_STACK_RESOURCE_MGMT_1}, - {0, 0, R_008C14_SQ_STACK_RESOURCE_MGMT_2}, - {0, 0, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ}, - {0, 0, R_009508_TA_CNTL_AUX}, - {0, 0, R_009714_VC_ENHANCE}, - {0, 0, R_009830_DB_DEBUG}, - {0, 0, R_009838_DB_WATERMARKS}, - {0, 0, R_028350_SX_MISC}, - {0, 0, R_0286C8_SPI_THREAD_GROUPING}, - {0, 0, R_0288A8_SQ_ESGS_RING_ITEMSIZE}, - {0, 0, R_0288AC_SQ_GSVS_RING_ITEMSIZE}, - {0, 0, R_0288B0_SQ_ESTMP_RING_ITEMSIZE}, - {0, 0, R_0288B4_SQ_GSTMP_RING_ITEMSIZE}, - {0, 0, R_0288B8_SQ_VSTMP_RING_ITEMSIZE}, - {0, 0, R_0288BC_SQ_PSTMP_RING_ITEMSIZE}, - {0, 0, R_0288C0_SQ_FBUF_RING_ITEMSIZE}, - {0, 0, R_0288C4_SQ_REDUC_RING_ITEMSIZE}, - {0, 0, R_0288C8_SQ_GS_VERT_ITEMSIZE}, - {0, 0, R_028A10_VGT_OUTPUT_PATH_CNTL}, - {0, 0, R_028A14_VGT_HOS_CNTL}, - {0, 0, R_028A18_VGT_HOS_MAX_TESS_LEVEL}, - {0, 0, R_028A1C_VGT_HOS_MIN_TESS_LEVEL}, - {0, 0, R_028A20_VGT_HOS_REUSE_DEPTH}, - {0, 0, R_028A24_VGT_GROUP_PRIM_TYPE}, - {0, 0, R_028A28_VGT_GROUP_FIRST_DECR}, - {0, 0, R_028A2C_VGT_GROUP_DECR}, - {0, 0, R_028A30_VGT_GROUP_VECT_0_CNTL}, - {0, 0, R_028A34_VGT_GROUP_VECT_1_CNTL}, - {0, 0, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL}, - {0, 0, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL}, - {0, 0, R_028A40_VGT_GS_MODE}, - {0, 0, R_028A4C_PA_SC_MODE_CNTL}, - {0, 0, R_028AB0_VGT_STRMOUT_EN}, - {0, 0, R_028AB4_VGT_REUSE_OFF}, - {0, 0, R_028AB8_VGT_VTX_CNT_EN}, - {0, 0, R_028B20_VGT_STRMOUT_BUFFER_EN}, - {0, 0, R_028028_DB_STENCIL_CLEAR}, - {0, 0, R_02802C_DB_DEPTH_CLEAR}, - {1, 0, R_028040_CB_COLOR0_BASE}, - {0, 0, R_0280A0_CB_COLOR0_INFO}, - {0, 0, R_028060_CB_COLOR0_SIZE}, - {0, 0, R_028080_CB_COLOR0_VIEW}, - {1, 0, R_0280E0_CB_COLOR0_FRAG}, - {1, 0, R_0280C0_CB_COLOR0_TILE}, - {0, 0, R_028100_CB_COLOR0_MASK}, - {1, 0, R_028044_CB_COLOR1_BASE}, - {0, 0, R_0280A4_CB_COLOR1_INFO}, - {0, 0, R_028064_CB_COLOR1_SIZE}, - {0, 0, R_028084_CB_COLOR1_VIEW}, - {1, 0, R_0280E4_CB_COLOR1_FRAG}, - {1, 0, R_0280C4_CB_COLOR1_TILE}, - {0, 0, R_028104_CB_COLOR1_MASK}, - {1, 0, R_028048_CB_COLOR2_BASE}, - {0, 0, R_0280A8_CB_COLOR2_INFO}, - {0, 0, R_028068_CB_COLOR2_SIZE}, - {0, 0, R_028088_CB_COLOR2_VIEW}, - {1, 0, R_0280E8_CB_COLOR2_FRAG}, - {1, 0, R_0280C8_CB_COLOR2_TILE}, - {0, 0, R_028108_CB_COLOR2_MASK}, - {1, 0, R_02804C_CB_COLOR3_BASE}, - {0, 0, R_0280AC_CB_COLOR3_INFO}, - {0, 0, R_02806C_CB_COLOR3_SIZE}, - {0, 0, R_02808C_CB_COLOR3_VIEW}, - {1, 0, R_0280EC_CB_COLOR3_FRAG}, - {1, 0, R_0280CC_CB_COLOR3_TILE}, - {0, 0, R_02810C_CB_COLOR3_MASK}, - {1, 0, R_028050_CB_COLOR4_BASE}, - {0, 0, R_0280B0_CB_COLOR4_INFO}, - {0, 0, R_028070_CB_COLOR4_SIZE}, - {0, 0, R_028090_CB_COLOR4_VIEW}, - {1, 0, R_0280F0_CB_COLOR4_FRAG}, - {1, 0, R_0280D0_CB_COLOR4_TILE}, - {0, 0, R_028110_CB_COLOR4_MASK}, - {1, 0, R_028054_CB_COLOR5_BASE}, - {0, 0, R_0280B4_CB_COLOR5_INFO}, - {0, 0, R_028074_CB_COLOR5_SIZE}, - {0, 0, R_028094_CB_COLOR5_VIEW}, - {1, 0, R_0280F4_CB_COLOR5_FRAG}, - {1, 0, R_0280D4_CB_COLOR5_TILE}, - {0, 0, R_028114_CB_COLOR5_MASK}, - {1, 0, R_028058_CB_COLOR6_BASE}, - {0, 0, R_0280B8_CB_COLOR6_INFO}, - {0, 0, R_028078_CB_COLOR6_SIZE}, - {0, 0, R_028098_CB_COLOR6_VIEW}, - {1, 0, R_0280F8_CB_COLOR6_FRAG}, - {1, 0, R_0280D8_CB_COLOR6_TILE}, - {0, 0, R_028118_CB_COLOR6_MASK}, - {1, 0, R_02805C_CB_COLOR7_BASE}, - {0, 0, R_0280BC_CB_COLOR7_INFO}, - {0, 0, R_02807C_CB_COLOR7_SIZE}, - {0, 0, R_02809C_CB_COLOR7_VIEW}, - {1, 0, R_0280FC_CB_COLOR7_FRAG}, - {1, 0, R_0280DC_CB_COLOR7_TILE}, - {0, 0, R_02811C_CB_COLOR7_MASK}, - {0, 0, R_028120_CB_CLEAR_RED}, - {0, 0, R_028124_CB_CLEAR_GREEN}, - {0, 0, R_028128_CB_CLEAR_BLUE}, - {0, 0, R_02812C_CB_CLEAR_ALPHA}, - {0, 0, R_02823C_CB_SHADER_MASK}, - {0, 0, R_028238_CB_TARGET_MASK}, - {0, 0, R_028410_SX_ALPHA_TEST_CONTROL}, - {0, 0, R_028414_CB_BLEND_RED}, - {0, 0, R_028418_CB_BLEND_GREEN}, - {0, 0, R_02841C_CB_BLEND_BLUE}, - {0, 0, R_028420_CB_BLEND_ALPHA}, - {0, 0, R_028424_CB_FOG_RED}, - {0, 0, R_028428_CB_FOG_GREEN}, - {0, 0, R_02842C_CB_FOG_BLUE}, - {0, 0, R_028430_DB_STENCILREFMASK}, - {0, 0, R_028434_DB_STENCILREFMASK_BF}, - {0, 0, R_028438_SX_ALPHA_REF}, - {0, 0, R_0286DC_SPI_FOG_CNTL}, - {0, 0, R_0286E0_SPI_FOG_FUNC_SCALE}, - {0, 0, R_0286E4_SPI_FOG_FUNC_BIAS}, - {0, 0, R_028780_CB_BLEND0_CONTROL}, - {0, 0, R_028784_CB_BLEND1_CONTROL}, - {0, 0, R_028788_CB_BLEND2_CONTROL}, - {0, 0, R_02878C_CB_BLEND3_CONTROL}, - {0, 0, R_028790_CB_BLEND4_CONTROL}, - {0, 0, R_028794_CB_BLEND5_CONTROL}, - {0, 0, R_028798_CB_BLEND6_CONTROL}, - {0, 0, R_02879C_CB_BLEND7_CONTROL}, - {0, 0, R_0287A0_CB_SHADER_CONTROL}, - {0, 0, R_028800_DB_DEPTH_CONTROL}, - {0, 0, R_028804_CB_BLEND_CONTROL}, - {0, 0, R_028808_CB_COLOR_CONTROL}, - {0, 0, R_02880C_DB_SHADER_CONTROL}, - {0, 0, R_028C04_PA_SC_AA_CONFIG}, - {0, 0, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX}, - {0, 0, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX}, - {0, 0, R_028C30_CB_CLRCMP_CONTROL}, - {0, 0, R_028C34_CB_CLRCMP_SRC}, - {0, 0, R_028C38_CB_CLRCMP_DST}, - {0, 0, R_028C3C_CB_CLRCMP_MSK}, - {0, 0, R_028C48_PA_SC_AA_MASK}, - {0, 0, R_028D2C_DB_SRESULTS_COMPARE_STATE1}, - {0, 0, R_028D44_DB_ALPHA_TO_MASK}, - {1, 0, R_02800C_DB_DEPTH_BASE}, - {0, 0, R_028000_DB_DEPTH_SIZE}, - {0, 0, R_028004_DB_DEPTH_VIEW}, - {0, 0, R_028010_DB_DEPTH_INFO}, - {0, 0, R_028D0C_DB_RENDER_CONTROL}, - {0, 0, R_028D10_DB_RENDER_OVERRIDE}, - {0, 0, R_028D24_DB_HTILE_SURFACE}, - {0, 0, R_028D30_DB_PRELOAD_CONTROL}, - {0, 0, R_028D34_DB_PREFETCH_LIMIT}, - {0, 0, R_028030_PA_SC_SCREEN_SCISSOR_TL}, - {0, 0, R_028034_PA_SC_SCREEN_SCISSOR_BR}, - {0, 0, R_028200_PA_SC_WINDOW_OFFSET}, - {0, 0, R_028204_PA_SC_WINDOW_SCISSOR_TL}, - {0, 0, R_028208_PA_SC_WINDOW_SCISSOR_BR}, - {0, 0, R_02820C_PA_SC_CLIPRECT_RULE}, - {0, 0, R_028210_PA_SC_CLIPRECT_0_TL}, - {0, 0, R_028214_PA_SC_CLIPRECT_0_BR}, - {0, 0, R_028218_PA_SC_CLIPRECT_1_TL}, - {0, 0, R_02821C_PA_SC_CLIPRECT_1_BR}, - {0, 0, R_028220_PA_SC_CLIPRECT_2_TL}, - {0, 0, R_028224_PA_SC_CLIPRECT_2_BR}, - {0, 0, R_028228_PA_SC_CLIPRECT_3_TL}, - {0, 0, R_02822C_PA_SC_CLIPRECT_3_BR}, - {0, 0, R_028230_PA_SC_EDGERULE}, - {0, 0, R_028240_PA_SC_GENERIC_SCISSOR_TL}, - {0, 0, R_028244_PA_SC_GENERIC_SCISSOR_BR}, - {0, 0, R_028250_PA_SC_VPORT_SCISSOR_0_TL}, - {0, 0, R_028254_PA_SC_VPORT_SCISSOR_0_BR}, - {0, 0, R_0282D0_PA_SC_VPORT_ZMIN_0}, - {0, 0, R_0282D4_PA_SC_VPORT_ZMAX_0}, - {0, 0, R_02843C_PA_CL_VPORT_XSCALE_0}, - {0, 0, R_028440_PA_CL_VPORT_XOFFSET_0}, - {0, 0, R_028444_PA_CL_VPORT_YSCALE_0}, - {0, 0, R_028448_PA_CL_VPORT_YOFFSET_0}, - {0, 0, R_02844C_PA_CL_VPORT_ZSCALE_0}, - {0, 0, R_028450_PA_CL_VPORT_ZOFFSET_0}, - {0, 0, R_0286D4_SPI_INTERP_CONTROL_0}, - {0, 0, R_028810_PA_CL_CLIP_CNTL}, - {0, 0, R_028814_PA_SU_SC_MODE_CNTL}, - {0, 0, R_028818_PA_CL_VTE_CNTL}, - {0, 0, R_02881C_PA_CL_VS_OUT_CNTL}, - {0, 0, R_028820_PA_CL_NANINF_CNTL}, - {0, 0, R_028A00_PA_SU_POINT_SIZE}, - {0, 0, R_028A04_PA_SU_POINT_MINMAX}, - {0, 0, R_028A08_PA_SU_LINE_CNTL}, - {0, 0, R_028A0C_PA_SC_LINE_STIPPLE}, - {0, 0, R_028A48_PA_SC_MPASS_PS_CNTL}, - {0, 0, R_028C00_PA_SC_LINE_CNTL}, - {0, 0, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ}, - {0, 0, R_028C10_PA_CL_GB_VERT_DISC_ADJ}, - {0, 0, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ}, - {0, 0, R_028C18_PA_CL_GB_HORZ_DISC_ADJ}, - {0, 0, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL}, - {0, 0, R_028DFC_PA_SU_POLY_OFFSET_CLAMP}, - {0, 0, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE}, - {0, 0, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET}, - {0, 0, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE}, - {0, 0, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET}, - {0, 0, R_028E20_PA_CL_UCP0_X}, - {0, 0, R_028E24_PA_CL_UCP0_Y}, - {0, 0, R_028E28_PA_CL_UCP0_Z}, - {0, 0, R_028E2C_PA_CL_UCP0_W}, - {0, 0, R_028E30_PA_CL_UCP1_X}, - {0, 0, R_028E34_PA_CL_UCP1_Y}, - {0, 0, R_028E38_PA_CL_UCP1_Z}, - {0, 0, R_028E3C_PA_CL_UCP1_W}, - {0, 0, R_028E40_PA_CL_UCP2_X}, - {0, 0, R_028E44_PA_CL_UCP2_Y}, - {0, 0, R_028E48_PA_CL_UCP2_Z}, - {0, 0, R_028E4C_PA_CL_UCP2_W}, - {0, 0, R_028E50_PA_CL_UCP3_X}, - {0, 0, R_028E54_PA_CL_UCP3_Y}, - {0, 0, R_028E58_PA_CL_UCP3_Z}, - {0, 0, R_028E5C_PA_CL_UCP3_W}, - {0, 0, R_028E60_PA_CL_UCP4_X}, - {0, 0, R_028E64_PA_CL_UCP4_Y}, - {0, 0, R_028E68_PA_CL_UCP4_Z}, - {0, 0, R_028E6C_PA_CL_UCP4_W}, - {0, 0, R_028E70_PA_CL_UCP5_X}, - {0, 0, R_028E74_PA_CL_UCP5_Y}, - {0, 0, R_028E78_PA_CL_UCP5_Z}, - {0, 0, R_028E7C_PA_CL_UCP5_W}, - {0, 0, R_028380_SQ_VTX_SEMANTIC_0}, - {0, 0, R_028384_SQ_VTX_SEMANTIC_1}, - {0, 0, R_028388_SQ_VTX_SEMANTIC_2}, - {0, 0, R_02838C_SQ_VTX_SEMANTIC_3}, - {0, 0, R_028390_SQ_VTX_SEMANTIC_4}, - {0, 0, R_028394_SQ_VTX_SEMANTIC_5}, - {0, 0, R_028398_SQ_VTX_SEMANTIC_6}, - {0, 0, R_02839C_SQ_VTX_SEMANTIC_7}, - {0, 0, R_0283A0_SQ_VTX_SEMANTIC_8}, - {0, 0, R_0283A4_SQ_VTX_SEMANTIC_9}, - {0, 0, R_0283A8_SQ_VTX_SEMANTIC_10}, - {0, 0, R_0283AC_SQ_VTX_SEMANTIC_11}, - {0, 0, R_0283B0_SQ_VTX_SEMANTIC_12}, - {0, 0, R_0283B4_SQ_VTX_SEMANTIC_13}, - {0, 0, R_0283B8_SQ_VTX_SEMANTIC_14}, - {0, 0, R_0283BC_SQ_VTX_SEMANTIC_15}, - {0, 0, R_0283C0_SQ_VTX_SEMANTIC_16}, - {0, 0, R_0283C4_SQ_VTX_SEMANTIC_17}, - {0, 0, R_0283C8_SQ_VTX_SEMANTIC_18}, - {0, 0, R_0283CC_SQ_VTX_SEMANTIC_19}, - {0, 0, R_0283D0_SQ_VTX_SEMANTIC_20}, - {0, 0, R_0283D4_SQ_VTX_SEMANTIC_21}, - {0, 0, R_0283D8_SQ_VTX_SEMANTIC_22}, - {0, 0, R_0283DC_SQ_VTX_SEMANTIC_23}, - {0, 0, R_0283E0_SQ_VTX_SEMANTIC_24}, - {0, 0, R_0283E4_SQ_VTX_SEMANTIC_25}, - {0, 0, R_0283E8_SQ_VTX_SEMANTIC_26}, - {0, 0, R_0283EC_SQ_VTX_SEMANTIC_27}, - {0, 0, R_0283F0_SQ_VTX_SEMANTIC_28}, - {0, 0, R_0283F4_SQ_VTX_SEMANTIC_29}, - {0, 0, R_0283F8_SQ_VTX_SEMANTIC_30}, - {0, 0, R_0283FC_SQ_VTX_SEMANTIC_31}, - {0, 0, R_028614_SPI_VS_OUT_ID_0}, - {0, 0, R_028618_SPI_VS_OUT_ID_1}, - {0, 0, R_02861C_SPI_VS_OUT_ID_2}, - {0, 0, R_028620_SPI_VS_OUT_ID_3}, - {0, 0, R_028624_SPI_VS_OUT_ID_4}, - {0, 0, R_028628_SPI_VS_OUT_ID_5}, - {0, 0, R_02862C_SPI_VS_OUT_ID_6}, - {0, 0, R_028630_SPI_VS_OUT_ID_7}, - {0, 0, R_028634_SPI_VS_OUT_ID_8}, - {0, 0, R_028638_SPI_VS_OUT_ID_9}, - {0, 0, R_0286C4_SPI_VS_OUT_CONFIG}, - {1, 0, R_028858_SQ_PGM_START_VS}, - {0, S_0085F0_SH_ACTION_ENA(1), R_028868_SQ_PGM_RESOURCES_VS}, - {1, 0, R_028894_SQ_PGM_START_FS}, - {0, S_0085F0_SH_ACTION_ENA(1), R_0288A4_SQ_PGM_RESOURCES_FS}, - {0, 0, R_0288D0_SQ_PGM_CF_OFFSET_VS}, - {0, 0, R_0288DC_SQ_PGM_CF_OFFSET_FS}, - {0, 0, R_028644_SPI_PS_INPUT_CNTL_0}, - {0, 0, R_028648_SPI_PS_INPUT_CNTL_1}, - {0, 0, R_02864C_SPI_PS_INPUT_CNTL_2}, - {0, 0, R_028650_SPI_PS_INPUT_CNTL_3}, - {0, 0, R_028654_SPI_PS_INPUT_CNTL_4}, - {0, 0, R_028658_SPI_PS_INPUT_CNTL_5}, - {0, 0, R_02865C_SPI_PS_INPUT_CNTL_6}, - {0, 0, R_028660_SPI_PS_INPUT_CNTL_7}, - {0, 0, R_028664_SPI_PS_INPUT_CNTL_8}, - {0, 0, R_028668_SPI_PS_INPUT_CNTL_9}, - {0, 0, R_02866C_SPI_PS_INPUT_CNTL_10}, - {0, 0, R_028670_SPI_PS_INPUT_CNTL_11}, - {0, 0, R_028674_SPI_PS_INPUT_CNTL_12}, - {0, 0, R_028678_SPI_PS_INPUT_CNTL_13}, - {0, 0, R_02867C_SPI_PS_INPUT_CNTL_14}, - {0, 0, R_028680_SPI_PS_INPUT_CNTL_15}, - {0, 0, R_028684_SPI_PS_INPUT_CNTL_16}, - {0, 0, R_028688_SPI_PS_INPUT_CNTL_17}, - {0, 0, R_02868C_SPI_PS_INPUT_CNTL_18}, - {0, 0, R_028690_SPI_PS_INPUT_CNTL_19}, - {0, 0, R_028694_SPI_PS_INPUT_CNTL_20}, - {0, 0, R_028698_SPI_PS_INPUT_CNTL_21}, - {0, 0, R_02869C_SPI_PS_INPUT_CNTL_22}, - {0, 0, R_0286A0_SPI_PS_INPUT_CNTL_23}, - {0, 0, R_0286A4_SPI_PS_INPUT_CNTL_24}, - {0, 0, R_0286A8_SPI_PS_INPUT_CNTL_25}, - {0, 0, R_0286AC_SPI_PS_INPUT_CNTL_26}, - {0, 0, R_0286B0_SPI_PS_INPUT_CNTL_27}, - {0, 0, R_0286B4_SPI_PS_INPUT_CNTL_28}, - {0, 0, R_0286B8_SPI_PS_INPUT_CNTL_29}, - {0, 0, R_0286BC_SPI_PS_INPUT_CNTL_30}, - {0, 0, R_0286C0_SPI_PS_INPUT_CNTL_31}, - {0, 0, R_0286CC_SPI_PS_IN_CONTROL_0}, - {0, 0, R_0286D0_SPI_PS_IN_CONTROL_1}, - {0, 0, R_0286D8_SPI_INPUT_Z}, - {1, S_0085F0_SH_ACTION_ENA(1), R_028840_SQ_PGM_START_PS}, - {0, 0, R_028850_SQ_PGM_RESOURCES_PS}, - {0, 0, R_028854_SQ_PGM_EXPORTS_PS}, - {0, 0, R_0288CC_SQ_PGM_CF_OFFSET_PS}, - {0, 0, R_008958_VGT_PRIMITIVE_TYPE}, - {0, 0, R_028400_VGT_MAX_VTX_INDX}, - {0, 0, R_028404_VGT_MIN_VTX_INDX}, - {0, 0, R_028408_VGT_INDX_OFFSET}, - {0, 0, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX}, - {0, 0, R_028A84_VGT_PRIMITIVEID_EN}, - {0, 0, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN}, - {0, 0, R_028AA0_VGT_INSTANCE_STEP_RATE_0}, - {0, 0, R_028AA4_VGT_INSTANCE_STEP_RATE_1}, -}; - -/* SHADER CONSTANT R600/R700 */ -static int r600_state_constant_init(struct r600_context *ctx, u32 offset) -{ - struct r600_reg r600_shader_constant[] = { - {0, 0, R_030000_SQ_ALU_CONSTANT0_0}, - {0, 0, R_030004_SQ_ALU_CONSTANT1_0}, - {0, 0, R_030008_SQ_ALU_CONSTANT2_0}, - {0, 0, R_03000C_SQ_ALU_CONSTANT3_0}, - }; - unsigned nreg = sizeof(r600_shader_constant)/sizeof(struct r600_reg); - - for (int i = 0; i < nreg; i++) { - r600_shader_constant[i].offset += offset; - } - return r600_context_add_block(ctx, r600_shader_constant, nreg); -} - -/* SHADER RESOURCE R600/R700 */ -static int r600_state_resource_init(struct r600_context *ctx, u32 offset) -{ - struct r600_reg r600_shader_resource[] = { - {0, 0, R_038000_RESOURCE0_WORD0}, - {0, 0, R_038004_RESOURCE0_WORD1}, - {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_038008_RESOURCE0_WORD2}, - {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_03800C_RESOURCE0_WORD3}, - {0, 0, R_038010_RESOURCE0_WORD4}, - {0, 0, R_038014_RESOURCE0_WORD5}, - {0, 0, R_038018_RESOURCE0_WORD6}, - }; - unsigned nreg = sizeof(r600_shader_resource)/sizeof(struct r600_reg); - - for (int i = 0; i < nreg; i++) { - r600_shader_resource[i].offset += offset; - } - return r600_context_add_block(ctx, r600_shader_resource, nreg); -} - -/* SHADER SAMPLER R600/R700 */ -static int r600_state_sampler_init(struct r600_context *ctx, u32 offset) -{ - struct r600_reg r600_shader_sampler[] = { - {0, 0, R_03C000_SQ_TEX_SAMPLER_WORD0_0}, - {0, 0, R_03C004_SQ_TEX_SAMPLER_WORD1_0}, - {0, 0, R_03C008_SQ_TEX_SAMPLER_WORD2_0}, - }; - unsigned nreg = sizeof(r600_shader_sampler)/sizeof(struct r600_reg); - - for (int i = 0; i < nreg; i++) { - r600_shader_sampler[i].offset += offset; - } - return r600_context_add_block(ctx, r600_shader_sampler, nreg); -} - -/* SHADER SAMPLER BORDER R600/R700 */ -static int r600_state_sampler_border_init(struct r600_context *ctx, u32 offset) -{ - struct r600_reg r600_shader_sampler_border[] = { - {0, 0, R_00A400_TD_PS_SAMPLER0_BORDER_RED}, - {0, 0, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN}, - {0, 0, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE}, - {0, 0, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA}, - }; - unsigned nreg = sizeof(r600_shader_sampler_border)/sizeof(struct r600_reg); - - for (int i = 0; i < nreg; i++) { - r600_shader_sampler_border[i].offset += offset; - } - return r600_context_add_block(ctx, r600_shader_sampler_border, nreg); -} - -/* initialize */ -void r600_context_fini(struct r600_context *ctx) -{ - for (int i = 0; i < ctx->ngroups; i++) { - r600_group_fini(&ctx->groups[i]); - } - free(ctx->reloc); - free(ctx->pm4); - memset(ctx, 0, sizeof(struct r600_context)); -} - -int r600_context_init(struct r600_context *ctx, struct radeon *radeon) -{ - int r; - - memset(ctx, 0, sizeof(struct r600_context)); - ctx->radeon = radeon; - /* initialize groups */ - r = r600_group_init(&ctx->groups[R600_GROUP_CONFIG], R600_CONFIG_REG_OFFSET, R600_CONFIG_REG_END); - if (r) { - goto out_err; - } - r = r600_group_init(&ctx->groups[R600_GROUP_CTL_CONST], R600_CTL_CONST_OFFSET, R600_CTL_CONST_END); - if (r) { - goto out_err; - } - r = r600_group_init(&ctx->groups[R600_GROUP_LOOP_CONST], R600_LOOP_CONST_OFFSET, R600_LOOP_CONST_END); - if (r) { - goto out_err; - } - r = r600_group_init(&ctx->groups[R600_GROUP_BOOL_CONST], R600_BOOL_CONST_OFFSET, R600_BOOL_CONST_END); - if (r) { - goto out_err; - } - r = r600_group_init(&ctx->groups[R600_GROUP_SAMPLER], R600_SAMPLER_OFFSET, R600_SAMPLER_END); - if (r) { - goto out_err; - } - r = r600_group_init(&ctx->groups[R600_GROUP_RESOURCE], R600_RESOURCE_OFFSET, R600_RESOURCE_END); - if (r) { - goto out_err; - } - r = r600_group_init(&ctx->groups[R600_GROUP_ALU_CONST], R600_ALU_CONST_OFFSET, R600_ALU_CONST_END); - if (r) { - goto out_err; - } - r = r600_group_init(&ctx->groups[R600_GROUP_CONTEXT], R600_CONTEXT_REG_OFFSET, R600_CONTEXT_REG_END); - if (r) { - goto out_err; - } - ctx->ngroups = R600_NGROUPS; - - /* add blocks */ - r = r600_context_add_block(ctx, r600_reg_list, sizeof(r600_reg_list)/sizeof(struct r600_reg)); - if (r) - goto out_err; - - /* PS SAMPLER BORDER */ - for (int j = 0, offset = 0; j < 18; j++, offset += 0x10) { - r = r600_state_sampler_border_init(ctx, offset); - if (r) - goto out_err; - } - - /* VS SAMPLER BORDER */ - for (int j = 0, offset = 0x200; j < 18; j++, offset += 0x10) { - r = r600_state_sampler_border_init(ctx, offset); - if (r) - goto out_err; - } - /* PS SAMPLER */ - for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) { - r = r600_state_sampler_init(ctx, offset); - if (r) - goto out_err; - } - /* VS SAMPLER */ - for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) { - r = r600_state_sampler_init(ctx, offset); - if (r) - goto out_err; - } - /* PS RESOURCE */ - for (int j = 0, offset = 0; j < 160; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* VS RESOURCE */ - for (int j = 0, offset = 0x1180; j < 160; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* PS CONSTANT */ - for (int j = 0, offset = 0; j < 256; j++, offset += 0x10) { - r = r600_state_constant_init(ctx, offset); - if (r) - goto out_err; - } - /* VS CONSTANT */ - for (int j = 0, offset = 0x1000; j < 256; j++, offset += 0x10) { - r = r600_state_constant_init(ctx, offset); - if (r) - goto out_err; - } - - /* allocate cs variables */ - ctx->nreloc = RADEON_CTX_MAX_PM4; - ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); - if (ctx->reloc == NULL) { - r = -ENOMEM; - goto out_err; - } - ctx->bo = calloc(ctx->nreloc, sizeof(void *)); - if (ctx->bo == NULL) { - r = -ENOMEM; - goto out_err; - } - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; - ctx->pm4 = calloc(ctx->pm4_ndwords, 4); - if (ctx->pm4 == NULL) { - r = -ENOMEM; - goto out_err; - } - return 0; -out_err: - r600_context_fini(ctx); - return r; -} - -static void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct radeon_ws_bo *bo) -{ - int i, reloc_id; - unsigned handle = radeon_ws_bo_get_handle(bo); - - assert(bo != NULL); - for (i = 0, reloc_id = -1; i < ctx->creloc; i++) { - if (ctx->reloc[i].handle == handle) { - reloc_id = i * sizeof(struct r600_reloc) / 4; - /* set PKT3 to point to proper reloc */ - *pm4 = reloc_id; - } - } - if (reloc_id == -1) { - /* add new relocation */ - if (ctx->creloc >= ctx->nreloc) { - r600_context_flush(ctx); - } - reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4; - ctx->reloc[ctx->creloc].handle = handle; - ctx->reloc[ctx->creloc].read_domain = RADEON_GEM_DOMAIN_GTT; - ctx->reloc[ctx->creloc].write_domain = RADEON_GEM_DOMAIN_GTT; - ctx->reloc[ctx->creloc].flags = 0; - radeon_ws_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo); - ctx->creloc++; - /* set PKT3 to point to proper reloc */ - *pm4 = reloc_id; - } -} - -void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state) -{ - struct r600_group *group; - struct r600_group_block *block; - - for (int i = 0; i < state->nregs; i++) { - unsigned id; - group = &ctx->groups[state->regs[i].group_id]; - id = group->offset_block_id[(state->regs[i].offset - group->start_offset) >> 2]; - block = &group->blocks[id]; - id = (state->regs[i].offset - block->start_offset) >> 2; - block->pm4[id] &= ~state->regs[i].mask; - block->pm4[id] |= state->regs[i].value; - if (block->pm4_bo_index[id]) { - /* find relocation */ - id = block->pm4_bo_index[id]; - radeon_ws_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo); - for (int j = 0; j < block->reloc[id].nreloc; j++) { - r600_context_bo_reloc(ctx, &block->pm4[block->reloc[id].bo_pm4_index[j]], - block->reloc[id].bo); - } - } - block->status |= R600_BLOCK_STATUS_ENABLED; - block->status |= R600_BLOCK_STATUS_DIRTY; - ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords; - } -} - -static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) -{ - struct r600_group_block *block; - unsigned id; - - offset -= ctx->groups[R600_GROUP_RESOURCE].start_offset; - id = ctx->groups[R600_GROUP_RESOURCE].offset_block_id[offset >> 2]; - block = &ctx->groups[R600_GROUP_RESOURCE].blocks[id]; - block->pm4[0] = state->regs[0].value; - block->pm4[1] = state->regs[1].value; - block->pm4[2] = state->regs[2].value; - block->pm4[3] = state->regs[3].value; - block->pm4[4] = state->regs[4].value; - block->pm4[5] = state->regs[5].value; - block->pm4[6] = state->regs[6].value; - radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, block->reloc[1].bo); - radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, block->reloc[2].bo); - if (state->regs[0].bo) { - /* VERTEX RESOURCE, we preted there is 2 bo to relocate so - * we have single case btw VERTEX & TEXTURE resource - */ - radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo); - radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo); - } else { - /* TEXTURE RESOURCE */ - radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); - radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); - } - r600_context_bo_reloc(ctx, &block->pm4[block->reloc[1].bo_pm4_index[0]], block->reloc[1].bo); - r600_context_bo_reloc(ctx, &block->pm4[block->reloc[2].bo_pm4_index[0]], block->reloc[2].bo); - block->status |= R600_BLOCK_STATUS_ENABLED; - block->status |= R600_BLOCK_STATUS_DIRTY; - ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords; -} - -void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) -{ - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1C * rid; - - r600_context_pipe_state_set_resource(ctx, state, offset); -} - -void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) -{ - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1180 + 0x1C * rid; - - r600_context_pipe_state_set_resource(ctx, state, offset); -} - -static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) -{ - struct r600_group_block *block; - unsigned id; - - offset -= ctx->groups[R600_GROUP_SAMPLER].start_offset; - id = ctx->groups[R600_GROUP_SAMPLER].offset_block_id[offset >> 2]; - block = &ctx->groups[R600_GROUP_SAMPLER].blocks[id]; - block->pm4[0] = state->regs[0].value; - block->pm4[1] = state->regs[1].value; - block->pm4[2] = state->regs[2].value; - block->status |= R600_BLOCK_STATUS_ENABLED; - block->status |= R600_BLOCK_STATUS_DIRTY; - ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords; -} - -static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) -{ - struct r600_group_block *block; - unsigned id; - - offset -= ctx->groups[R600_GROUP_CONFIG].start_offset; - id = ctx->groups[R600_GROUP_CONFIG].offset_block_id[offset >> 2]; - block = &ctx->groups[R600_GROUP_CONFIG].blocks[id]; - block->pm4[0] = state->regs[3].value; - block->pm4[1] = state->regs[4].value; - block->pm4[2] = state->regs[5].value; - block->pm4[3] = state->regs[6].value; - block->status |= R600_BLOCK_STATUS_ENABLED; - block->status |= R600_BLOCK_STATUS_DIRTY; - ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords; -} - -void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) -{ - unsigned offset; - - offset = 0x0003C000 + id * 0xc; - r600_context_pipe_state_set_sampler(ctx, state, offset); - if (state->nregs > 3) { - offset = 0x0000A400 + id * 0x10; - r600_context_pipe_state_set_sampler_border(ctx, state, offset); - } -} - -void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) -{ - unsigned offset; - - offset = 0x0003C0D8 + id * 0xc; - r600_context_pipe_state_set_sampler(ctx, state, offset); - if (state->nregs > 3) { - offset = 0x0000A600 + id * 0x10; - r600_context_pipe_state_set_sampler_border(ctx, state, offset); - } -} - -static inline void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *group, unsigned opcode) -{ - for (int i = 0; i < group->nblocks; i++) { - struct r600_group_block *block = &group->blocks[i]; - if (block->status & R600_BLOCK_STATUS_DIRTY) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(opcode, block->nreg); - ctx->pm4[ctx->pm4_cdwords++] = (block->start_offset - group->start_offset) >> 2; - memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4); - ctx->pm4_cdwords += block->pm4_ndwords; - block->status ^= R600_BLOCK_STATUS_DIRTY; - } - } -} - -void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) -{ - unsigned ndwords = 9; - - if (draw->indices) { - ndwords = 13; - /* make sure there is enough relocation space before scheduling draw */ - if (ctx->creloc >= (ctx->nreloc - 1)) { - r600_context_flush(ctx); - } - } - if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { - /* need to flush */ - r600_context_flush(ctx); - } - /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ - if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { - R600_ERR("context is too big to be scheduled\n"); - return; - } - /* Ok we enough room to copy packet */ - r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONFIG], PKT3_SET_CONFIG_REG); - r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONTEXT], PKT3_SET_CONTEXT_REG); - r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_ALU_CONST], PKT3_SET_ALU_CONST); - r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_SAMPLER], PKT3_SET_SAMPLER); - r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_RESOURCE], PKT3_SET_RESOURCE); - /* draw packet */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; - if (draw->indices) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3); - ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset; - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); - } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); - ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT; -} - -void r600_context_flush(struct r600_context *ctx) -{ - struct drm_radeon_cs drmib; - struct drm_radeon_cs_chunk chunks[2]; - uint64_t chunk_array[2]; - struct r600_group_block *block; - int r; - - if (!ctx->pm4_cdwords) - return; - -#if 1 - /* emit cs */ - drmib.num_chunks = 2; - drmib.chunks = (uint64_t)(uintptr_t)chunk_array; - chunks[0].chunk_id = RADEON_CHUNK_ID_IB; - chunks[0].length_dw = ctx->pm4_cdwords; - chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4; - chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; - chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4; - chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc; - chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0]; - chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1]; - r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, - sizeof(struct drm_radeon_cs)); -#endif - /* restart */ - for (int i = 0; i < ctx->creloc; i++) { - radeon_ws_bo_reference(ctx->radeon, &ctx->bo[i], NULL); - } - ctx->creloc = 0; - ctx->pm4_dirty_cdwords = 0; - ctx->pm4_cdwords = 0; - for (int i = 0; i < ctx->ngroups; i++) { - for (int j = 0; j < ctx->groups[i].nblocks; j++) { - /* mark enabled block as dirty */ - block = &ctx->groups[i].blocks[j]; - if (block->status & R600_BLOCK_STATUS_ENABLED) { - ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords; - block->status |= R600_BLOCK_STATUS_DIRTY; - for (int k = 1; k <= block->nbo; k++) { - for (int l = 0; l < block->reloc[k].nreloc; l++) { - r600_context_bo_reloc(ctx, - &block->pm4[block->reloc[k].bo_pm4_index[l]], - block->reloc[k].bo); - } - } - } - } - } -} - -void r600_context_dump_bof(struct r600_context *ctx, const char *file) -{ - bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root; - unsigned i; - - root = device_id = bcs = blob = array = bo = size = handle = NULL; - root = bof_object(); - if (root == NULL) - goto out_err; - device_id = bof_int32(ctx->radeon->device); - if (device_id == NULL) - goto out_err; - if (bof_object_set(root, "device_id", device_id)) - goto out_err; - bof_decref(device_id); - device_id = NULL; - /* dump relocs */ - blob = bof_blob(ctx->creloc * 16, ctx->reloc); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "reloc", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump cs */ - blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "pm4", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump bo */ - array = bof_array(); - if (array == NULL) - goto out_err; - for (i = 0; i < ctx->creloc; i++) { - struct radeon_bo *rbo = radeon_bo_pb_get_bo(ctx->bo[i]->pb); - bo = bof_object(); - if (bo == NULL) - goto out_err; - size = bof_int32(rbo->size); - if (size == NULL) - goto out_err; - if (bof_object_set(bo, "size", size)) - goto out_err; - bof_decref(size); - size = NULL; - handle = bof_int32(rbo->handle); - if (handle == NULL) - goto out_err; - if (bof_object_set(bo, "handle", handle)) - goto out_err; - bof_decref(handle); - handle = NULL; - radeon_bo_map(ctx->radeon, rbo); - blob = bof_blob(rbo->size, rbo->data); - radeon_bo_unmap(ctx->radeon, rbo); - if (blob == NULL) - goto out_err; - if (bof_object_set(bo, "data", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - if (bof_array_append(array, bo)) - goto out_err; - bof_decref(bo); - bo = NULL; - } - if (bof_object_set(root, "bo", array)) - goto out_err; - bof_dump_file(root, file); -out_err: - bof_decref(blob); - bof_decref(array); - bof_decref(bo); - bof_decref(size); - bof_decref(handle); - bof_decref(device_id); - bof_decref(root); -} diff --git a/src/gallium/winsys/r600/drm/r600_states.h b/src/gallium/winsys/r600/drm/r600_states.h deleted file mode 100644 index 50b25a9940..0000000000 --- a/src/gallium/winsys/r600/drm/r600_states.h +++ /dev/null @@ -1,522 +0,0 @@ -/* - * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org> - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ -#ifndef R600_STATES_H -#define R600_STATES_H - -static const struct radeon_register R600_names_CONFIG[] = { - {0x00008C00, 0, 0, "SQ_CONFIG"}, - {0x00008C04, 0, 0, "SQ_GPR_RESOURCE_MGMT_1"}, - {0x00008C08, 0, 0, "SQ_GPR_RESOURCE_MGMT_2"}, - {0x00008C0C, 0, 0, "SQ_THREAD_RESOURCE_MGMT"}, - {0x00008C10, 0, 0, "SQ_STACK_RESOURCE_MGMT_1"}, - {0x00008C14, 0, 0, "SQ_STACK_RESOURCE_MGMT_2"}, - {0x00008D8C, 0, 0, "SQ_DYN_GPR_CNTL_PS_FLUSH_REQ"}, - {0x00009508, 0, 0, "TA_CNTL_AUX"}, - {0x00009714, 0, 0, "VC_ENHANCE"}, - {0x00009830, 0, 0, "DB_DEBUG"}, - {0x00009838, 0, 0, "DB_WATERMARKS"}, - {0x00028350, 0, 0, "SX_MISC"}, - {0x000286C8, 0, 0, "SPI_THREAD_GROUPING"}, - {0x000287A0, 0, 0, "CB_SHADER_CONTROL"}, - {0x000288A8, 0, 0, "SQ_ESGS_RING_ITEMSIZE"}, - {0x000288AC, 0, 0, "SQ_GSVS_RING_ITEMSIZE"}, - {0x000288B0, 0, 0, "SQ_ESTMP_RING_ITEMSIZE"}, - {0x000288B4, 0, 0, "SQ_GSTMP_RING_ITEMSIZE"}, - {0x000288B8, 0, 0, "SQ_VSTMP_RING_ITEMSIZE"}, - {0x000288BC, 0, 0, "SQ_PSTMP_RING_ITEMSIZE"}, - {0x000288C0, 0, 0, "SQ_FBUF_RING_ITEMSIZE"}, - {0x000288C4, 0, 0, "SQ_REDUC_RING_ITEMSIZE"}, - {0x000288C8, 0, 0, "SQ_GS_VERT_ITEMSIZE"}, - {0x00028A10, 0, 0, "VGT_OUTPUT_PATH_CNTL"}, - {0x00028A14, 0, 0, "VGT_HOS_CNTL"}, - {0x00028A18, 0, 0, "VGT_HOS_MAX_TESS_LEVEL"}, - {0x00028A1C, 0, 0, "VGT_HOS_MIN_TESS_LEVEL"}, - {0x00028A20, 0, 0, "VGT_HOS_REUSE_DEPTH"}, - {0x00028A24, 0, 0, "VGT_GROUP_PRIM_TYPE"}, - {0x00028A28, 0, 0, "VGT_GROUP_FIRST_DECR"}, - {0x00028A2C, 0, 0, "VGT_GROUP_DECR"}, - {0x00028A30, 0, 0, "VGT_GROUP_VECT_0_CNTL"}, - {0x00028A34, 0, 0, "VGT_GROUP_VECT_1_CNTL"}, - {0x00028A38, 0, 0, "VGT_GROUP_VECT_0_FMT_CNTL"}, - {0x00028A3C, 0, 0, "VGT_GROUP_VECT_1_FMT_CNTL"}, - {0x00028A40, 0, 0, "VGT_GS_MODE"}, - {0x00028A4C, 0, 0, "PA_SC_MODE_CNTL"}, - {0x00028AB0, 0, 0, "VGT_STRMOUT_EN"}, - {0x00028AB4, 0, 0, "VGT_REUSE_OFF"}, - {0x00028AB8, 0, 0, "VGT_VTX_CNT_EN"}, - {0x00028B20, 0, 0, "VGT_STRMOUT_BUFFER_EN"}, -}; - -static const struct radeon_register R600_names_CB_CNTL[] = { - {0x00028120, 0, 0, "CB_CLEAR_RED"}, - {0x00028124, 0, 0, "CB_CLEAR_GREEN"}, - {0x00028128, 0, 0, "CB_CLEAR_BLUE"}, - {0x0002812C, 0, 0, "CB_CLEAR_ALPHA"}, - {0x0002823C, 0, 0, "CB_SHADER_MASK"}, - {0x00028238, 0, 0, "CB_TARGET_MASK"}, - {0x00028424, 0, 0, "CB_FOG_RED"}, - {0x00028428, 0, 0, "CB_FOG_GREEN"}, - {0x0002842C, 0, 0, "CB_FOG_BLUE"}, - {0x00028808, 0, 0, "CB_COLOR_CONTROL"}, - {0x00028C04, 0, 0, "PA_SC_AA_CONFIG"}, - {0x00028C1C, 0, 0, "PA_SC_AA_SAMPLE_LOCS_MCTX"}, - {0x00028C20, 0, 0, "PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX"}, - {0x00028C30, 0, 0, "CB_CLRCMP_CONTROL"}, - {0x00028C34, 0, 0, "CB_CLRCMP_SRC"}, - {0x00028C38, 0, 0, "CB_CLRCMP_DST"}, - {0x00028C3C, 0, 0, "CB_CLRCMP_MSK"}, - {0x00028C48, 0, 0, "PA_SC_AA_MASK"}, -}; - -static const struct radeon_register R600_names_RASTERIZER[] = { - {0x000286D4, 0, 0, "SPI_INTERP_CONTROL_0"}, - {0x00028810, 0, 0, "PA_CL_CLIP_CNTL"}, - {0x00028814, 0, 0, "PA_SU_SC_MODE_CNTL"}, - {0x0002881C, 0, 0, "PA_CL_VS_OUT_CNTL"}, - {0x00028820, 0, 0, "PA_CL_NANINF_CNTL"}, - {0x00028A00, 0, 0, "PA_SU_POINT_SIZE"}, - {0x00028A04, 0, 0, "PA_SU_POINT_MINMAX"}, - {0x00028A08, 0, 0, "PA_SU_LINE_CNTL"}, - {0x00028A0C, 0, 0, "PA_SC_LINE_STIPPLE"}, - {0x00028A48, 0, 0, "PA_SC_MPASS_PS_CNTL"}, - {0x00028C00, 0, 0, "PA_SC_LINE_CNTL"}, - {0x00028C0C, 0, 0, "PA_CL_GB_VERT_CLIP_ADJ"}, - {0x00028C10, 0, 0, "PA_CL_GB_VERT_DISC_ADJ"}, - {0x00028C14, 0, 0, "PA_CL_GB_HORZ_CLIP_ADJ"}, - {0x00028C18, 0, 0, "PA_CL_GB_HORZ_DISC_ADJ"}, - {0x00028DF8, 0, 0, "PA_SU_POLY_OFFSET_DB_FMT_CNTL"}, - {0x00028DFC, 0, 0, "PA_SU_POLY_OFFSET_CLAMP"}, - {0x00028E00, 0, 0, "PA_SU_POLY_OFFSET_FRONT_SCALE"}, - {0x00028E04, 0, 0, "PA_SU_POLY_OFFSET_FRONT_OFFSET"}, - {0x00028E08, 0, 0, "PA_SU_POLY_OFFSET_BACK_SCALE"}, - {0x00028E0C, 0, 0, "PA_SU_POLY_OFFSET_BACK_OFFSET"}, -}; - -static const struct radeon_register R600_names_VIEWPORT[] = { - {0x000282D0, 0, 0, "PA_SC_VPORT_ZMIN_0"}, - {0x000282D4, 0, 0, "PA_SC_VPORT_ZMAX_0"}, - {0x0002843C, 0, 0, "PA_CL_VPORT_XSCALE_0"}, - {0x00028444, 0, 0, "PA_CL_VPORT_YSCALE_0"}, - {0x0002844C, 0, 0, "PA_CL_VPORT_ZSCALE_0"}, - {0x00028440, 0, 0, "PA_CL_VPORT_XOFFSET_0"}, - {0x00028448, 0, 0, "PA_CL_VPORT_YOFFSET_0"}, - {0x00028450, 0, 0, "PA_CL_VPORT_ZOFFSET_0"}, - {0x00028818, 0, 0, "PA_CL_VTE_CNTL"}, -}; - -static const struct radeon_register R600_names_SCISSOR[] = { - {0x00028030, 0, 0, "PA_SC_SCREEN_SCISSOR_TL"}, - {0x00028034, 0, 0, "PA_SC_SCREEN_SCISSOR_BR"}, - {0x00028200, 0, 0, "PA_SC_WINDOW_OFFSET"}, - {0x00028204, 0, 0, "PA_SC_WINDOW_SCISSOR_TL"}, - {0x00028208, 0, 0, "PA_SC_WINDOW_SCISSOR_BR"}, - {0x0002820C, 0, 0, "PA_SC_CLIPRECT_RULE"}, - {0x00028210, 0, 0, "PA_SC_CLIPRECT_0_TL"}, - {0x00028214, 0, 0, "PA_SC_CLIPRECT_0_BR"}, - {0x00028218, 0, 0, "PA_SC_CLIPRECT_1_TL"}, - {0x0002821C, 0, 0, "PA_SC_CLIPRECT_1_BR"}, - {0x00028220, 0, 0, "PA_SC_CLIPRECT_2_TL"}, - {0x00028224, 0, 0, "PA_SC_CLIPRECT_2_BR"}, - {0x00028228, 0, 0, "PA_SC_CLIPRECT_3_TL"}, - {0x0002822C, 0, 0, "PA_SC_CLIPRECT_3_BR"}, - {0x00028230, 0, 0, "PA_SC_EDGERULE"}, - {0x00028240, 0, 0, "PA_SC_GENERIC_SCISSOR_TL"}, - {0x00028244, 0, 0, "PA_SC_GENERIC_SCISSOR_BR"}, - {0x00028250, 0, 0, "PA_SC_VPORT_SCISSOR_0_TL"}, - {0x00028254, 0, 0, "PA_SC_VPORT_SCISSOR_0_BR"}, -}; - -static const struct radeon_register R600_names_BLEND[] = { - {0x00028414, 0, 0, "CB_BLEND_RED"}, - {0x00028418, 0, 0, "CB_BLEND_GREEN"}, - {0x0002841C, 0, 0, "CB_BLEND_BLUE"}, - {0x00028420, 0, 0, "CB_BLEND_ALPHA"}, - {0x00028780, 0, 0, "CB_BLEND0_CONTROL"}, - {0x00028784, 0, 0, "CB_BLEND1_CONTROL"}, - {0x00028788, 0, 0, "CB_BLEND2_CONTROL"}, - {0x0002878C, 0, 0, "CB_BLEND3_CONTROL"}, - {0x00028790, 0, 0, "CB_BLEND4_CONTROL"}, - {0x00028794, 0, 0, "CB_BLEND5_CONTROL"}, - {0x00028798, 0, 0, "CB_BLEND6_CONTROL"}, - {0x0002879C, 0, 0, "CB_BLEND7_CONTROL"}, - {0x00028804, 0, 0, "CB_BLEND_CONTROL"}, -}; - -static const struct radeon_register R600_names_DSA[] = { - {0x00028028, 0, 0, "DB_STENCIL_CLEAR"}, - {0x0002802C, 0, 0, "DB_DEPTH_CLEAR"}, - {0x00028410, 0, 0, "SX_ALPHA_TEST_CONTROL"}, - {0x00028430, 0, 0, "DB_STENCILREFMASK"}, - {0x00028434, 0, 0, "DB_STENCILREFMASK_BF"}, - {0x00028438, 0, 0, "SX_ALPHA_REF"}, - {0x000286E0, 0, 0, "SPI_FOG_FUNC_SCALE"}, - {0x000286E4, 0, 0, "SPI_FOG_FUNC_BIAS"}, - {0x000286DC, 0, 0, "SPI_FOG_CNTL"}, - {0x00028800, 0, 0, "DB_DEPTH_CONTROL"}, - {0x0002880C, 0, 0, "DB_SHADER_CONTROL"}, - {0x00028D0C, 0, 0, "DB_RENDER_CONTROL"}, - {0x00028D10, 0, 0, "DB_RENDER_OVERRIDE"}, - {0x00028D2C, 0, 0, "DB_SRESULTS_COMPARE_STATE1"}, - {0x00028D30, 0, 0, "DB_PRELOAD_CONTROL"}, - {0x00028D44, 0, 0, "DB_ALPHA_TO_MASK"}, -}; - -static const struct radeon_register R600_names_VS_SHADER[] = { - {0x00028380, 0, 0, "SQ_VTX_SEMANTIC_0"}, - {0x00028384, 0, 0, "SQ_VTX_SEMANTIC_1"}, - {0x00028388, 0, 0, "SQ_VTX_SEMANTIC_2"}, - {0x0002838C, 0, 0, "SQ_VTX_SEMANTIC_3"}, - {0x00028390, 0, 0, "SQ_VTX_SEMANTIC_4"}, - {0x00028394, 0, 0, "SQ_VTX_SEMANTIC_5"}, - {0x00028398, 0, 0, "SQ_VTX_SEMANTIC_6"}, - {0x0002839C, 0, 0, "SQ_VTX_SEMANTIC_7"}, - {0x000283A0, 0, 0, "SQ_VTX_SEMANTIC_8"}, - {0x000283A4, 0, 0, "SQ_VTX_SEMANTIC_9"}, - {0x000283A8, 0, 0, "SQ_VTX_SEMANTIC_10"}, - {0x000283AC, 0, 0, "SQ_VTX_SEMANTIC_11"}, - {0x000283B0, 0, 0, "SQ_VTX_SEMANTIC_12"}, - {0x000283B4, 0, 0, "SQ_VTX_SEMANTIC_13"}, - {0x000283B8, 0, 0, "SQ_VTX_SEMANTIC_14"}, - {0x000283BC, 0, 0, "SQ_VTX_SEMANTIC_15"}, - {0x000283C0, 0, 0, "SQ_VTX_SEMANTIC_16"}, - {0x000283C4, 0, 0, "SQ_VTX_SEMANTIC_17"}, - {0x000283C8, 0, 0, "SQ_VTX_SEMANTIC_18"}, - {0x000283CC, 0, 0, "SQ_VTX_SEMANTIC_19"}, - {0x000283D0, 0, 0, "SQ_VTX_SEMANTIC_20"}, - {0x000283D4, 0, 0, "SQ_VTX_SEMANTIC_21"}, - {0x000283D8, 0, 0, "SQ_VTX_SEMANTIC_22"}, - {0x000283DC, 0, 0, "SQ_VTX_SEMANTIC_23"}, - {0x000283E0, 0, 0, "SQ_VTX_SEMANTIC_24"}, - {0x000283E4, 0, 0, "SQ_VTX_SEMANTIC_25"}, - {0x000283E8, 0, 0, "SQ_VTX_SEMANTIC_26"}, - {0x000283EC, 0, 0, "SQ_VTX_SEMANTIC_27"}, - {0x000283F0, 0, 0, "SQ_VTX_SEMANTIC_28"}, - {0x000283F4, 0, 0, "SQ_VTX_SEMANTIC_29"}, - {0x000283F8, 0, 0, "SQ_VTX_SEMANTIC_30"}, - {0x000283FC, 0, 0, "SQ_VTX_SEMANTIC_31"}, - {0x00028614, 0, 0, "SPI_VS_OUT_ID_0"}, - {0x00028618, 0, 0, "SPI_VS_OUT_ID_1"}, - {0x0002861C, 0, 0, "SPI_VS_OUT_ID_2"}, - {0x00028620, 0, 0, "SPI_VS_OUT_ID_3"}, - {0x00028624, 0, 0, "SPI_VS_OUT_ID_4"}, - {0x00028628, 0, 0, "SPI_VS_OUT_ID_5"}, - {0x0002862C, 0, 0, "SPI_VS_OUT_ID_6"}, - {0x00028630, 0, 0, "SPI_VS_OUT_ID_7"}, - {0x00028634, 0, 0, "SPI_VS_OUT_ID_8"}, - {0x00028638, 0, 0, "SPI_VS_OUT_ID_9"}, - {0x000286C4, 0, 0, "SPI_VS_OUT_CONFIG"}, - {0x00028858, 1, 0, "SQ_PGM_START_VS"}, - {0x00028868, 0, 0, "SQ_PGM_RESOURCES_VS"}, - {0x00028894, 1, 1, "SQ_PGM_START_FS"}, - {0x000288A4, 0, 0, "SQ_PGM_RESOURCES_FS"}, - {0x000288D0, 0, 0, "SQ_PGM_CF_OFFSET_VS"}, - {0x000288DC, 0, 0, "SQ_PGM_CF_OFFSET_FS"}, -}; - -static const struct radeon_register R600_names_PS_SHADER[] = { - {0x00028644, 0, 0, "SPI_PS_INPUT_CNTL_0"}, - {0x00028648, 0, 0, "SPI_PS_INPUT_CNTL_1"}, - {0x0002864C, 0, 0, "SPI_PS_INPUT_CNTL_2"}, - {0x00028650, 0, 0, "SPI_PS_INPUT_CNTL_3"}, - {0x00028654, 0, 0, "SPI_PS_INPUT_CNTL_4"}, - {0x00028658, 0, 0, "SPI_PS_INPUT_CNTL_5"}, - {0x0002865C, 0, 0, "SPI_PS_INPUT_CNTL_6"}, - {0x00028660, 0, 0, "SPI_PS_INPUT_CNTL_7"}, - {0x00028664, 0, 0, "SPI_PS_INPUT_CNTL_8"}, - {0x00028668, 0, 0, "SPI_PS_INPUT_CNTL_9"}, - {0x0002866C, 0, 0, "SPI_PS_INPUT_CNTL_10"}, - {0x00028670, 0, 0, "SPI_PS_INPUT_CNTL_11"}, - {0x00028674, 0, 0, "SPI_PS_INPUT_CNTL_12"}, - {0x00028678, 0, 0, "SPI_PS_INPUT_CNTL_13"}, - {0x0002867C, 0, 0, "SPI_PS_INPUT_CNTL_14"}, - {0x00028680, 0, 0, "SPI_PS_INPUT_CNTL_15"}, - {0x00028684, 0, 0, "SPI_PS_INPUT_CNTL_16"}, - {0x00028688, 0, 0, "SPI_PS_INPUT_CNTL_17"}, - {0x0002868C, 0, 0, "SPI_PS_INPUT_CNTL_18"}, - {0x00028690, 0, 0, "SPI_PS_INPUT_CNTL_19"}, - {0x00028694, 0, 0, "SPI_PS_INPUT_CNTL_20"}, - {0x00028698, 0, 0, "SPI_PS_INPUT_CNTL_21"}, - {0x0002869C, 0, 0, "SPI_PS_INPUT_CNTL_22"}, - {0x000286A0, 0, 0, "SPI_PS_INPUT_CNTL_23"}, - {0x000286A4, 0, 0, "SPI_PS_INPUT_CNTL_24"}, - {0x000286A8, 0, 0, "SPI_PS_INPUT_CNTL_25"}, - {0x000286AC, 0, 0, "SPI_PS_INPUT_CNTL_26"}, - {0x000286B0, 0, 0, "SPI_PS_INPUT_CNTL_27"}, - {0x000286B4, 0, 0, "SPI_PS_INPUT_CNTL_28"}, - {0x000286B8, 0, 0, "SPI_PS_INPUT_CNTL_29"}, - {0x000286BC, 0, 0, "SPI_PS_INPUT_CNTL_30"}, - {0x000286C0, 0, 0, "SPI_PS_INPUT_CNTL_31"}, - {0x000286CC, 0, 0, "SPI_PS_IN_CONTROL_0"}, - {0x000286D0, 0, 0, "SPI_PS_IN_CONTROL_1"}, - {0x000286D8, 0, 0, "SPI_INPUT_Z"}, - {0x00028840, 1, 0, "SQ_PGM_START_PS"}, - {0x00028850, 0, 0, "SQ_PGM_RESOURCES_PS"}, - {0x00028854, 0, 0, "SQ_PGM_EXPORTS_PS"}, - {0x000288CC, 0, 0, "SQ_PGM_CF_OFFSET_PS"}, -}; - -static const struct radeon_register R600_names_VS_CBUF[] = { - {0x00028180, 0, 0, "ALU_CONST_BUFFER_SIZE_VS_0"}, - {0x00028980, 1, 0, "ALU_CONST_CACHE_VS_0"}, -}; - -static const struct radeon_register R600_names_PS_CBUF[] = { - {0x00028140, 0, 0, "ALU_CONST_BUFFER_SIZE_PS_0"}, - {0x00028940, 1, 0, "ALU_CONST_CACHE_PS_0"}, -}; - -static const struct radeon_register R600_names_PS_CONSTANT[] = { - {0x00030000, 0, 0, "SQ_ALU_CONSTANT0_0"}, - {0x00030004, 0, 0, "SQ_ALU_CONSTANT1_0"}, - {0x00030008, 0, 0, "SQ_ALU_CONSTANT2_0"}, - {0x0003000C, 0, 0, "SQ_ALU_CONSTANT3_0"}, -}; - -static const struct radeon_register R600_names_VS_CONSTANT[] = { - {0x00031000, 0, 0, "SQ_ALU_CONSTANT0_256"}, - {0x00031004, 0, 0, "SQ_ALU_CONSTANT1_256"}, - {0x00031008, 0, 0, "SQ_ALU_CONSTANT2_256"}, - {0x0003100C, 0, 0, "SQ_ALU_CONSTANT3_256"}, -}; - -static const struct radeon_register R600_names_UCP[] = { - {0x00028E20, 0, 0, "PA_CL_UCP0_X"}, - {0x00028E24, 0, 0, "PA_CL_UCP0_Y"}, - {0x00028E28, 0, 0, "PA_CL_UCP0_Z"}, - {0x00028E2C, 0, 0, "PA_CL_UCP0_W"}, - {0x00028E30, 0, 0, "PA_CL_UCP1_X"}, - {0x00028E34, 0, 0, "PA_CL_UCP1_Y"}, - {0x00028E38, 0, 0, "PA_CL_UCP1_Z"}, - {0x00028E3C, 0, 0, "PA_CL_UCP1_W"}, - {0x00028E40, 0, 0, "PA_CL_UCP2_X"}, - {0x00028E44, 0, 0, "PA_CL_UCP2_Y"}, - {0x00028E48, 0, 0, "PA_CL_UCP2_Z"}, - {0x00028E4C, 0, 0, "PA_CL_UCP2_W"}, - {0x00028E50, 0, 0, "PA_CL_UCP3_X"}, - {0x00028E54, 0, 0, "PA_CL_UCP3_Y"}, - {0x00028E58, 0, 0, "PA_CL_UCP3_Z"}, - {0x00028E5C, 0, 0, "PA_CL_UCP3_W"}, - {0x00028E60, 0, 0, "PA_CL_UCP4_X"}, - {0x00028E64, 0, 0, "PA_CL_UCP4_Y"}, - {0x00028E68, 0, 0, "PA_CL_UCP4_Z"}, - {0x00028E6C, 0, 0, "PA_CL_UCP4_W"}, - {0x00028E70, 0, 0, "PA_CL_UCP5_X"}, - {0x00028E74, 0, 0, "PA_CL_UCP5_Y"}, - {0x00028E78, 0, 0, "PA_CL_UCP5_Z"}, - {0x00028E7C, 0, 0, "PA_CL_UCP5_W"}, -}; - -static const struct radeon_register R600_names_PS_RESOURCE[] = { - {0x00038000, 0, 0, "RESOURCE0_WORD0"}, - {0x00038004, 0, 0, "RESOURCE0_WORD1"}, - {0x00038008, 0, 0, "RESOURCE0_WORD2"}, - {0x0003800C, 0, 0, "RESOURCE0_WORD3"}, - {0x00038010, 0, 0, "RESOURCE0_WORD4"}, - {0x00038014, 0, 0, "RESOURCE0_WORD5"}, - {0x00038018, 0, 0, "RESOURCE0_WORD6"}, -}; - -static const struct radeon_register R600_names_VS_RESOURCE[] = { - {0x00039180, 0, 0, "RESOURCE160_WORD0"}, - {0x00039184, 0, 0, "RESOURCE160_WORD1"}, - {0x00039188, 0, 0, "RESOURCE160_WORD2"}, - {0x0003918C, 0, 0, "RESOURCE160_WORD3"}, - {0x00039190, 0, 0, "RESOURCE160_WORD4"}, - {0x00039194, 0, 0, "RESOURCE160_WORD5"}, - {0x00039198, 0, 0, "RESOURCE160_WORD6"}, -}; - -static const struct radeon_register R600_names_FS_RESOURCE[] = { - {0x0003A300, 0, 0, "RESOURCE320_WORD0"}, - {0x0003A304, 0, 0, "RESOURCE320_WORD1"}, - {0x0003A308, 0, 0, "RESOURCE320_WORD2"}, - {0x0003A30C, 0, 0, "RESOURCE320_WORD3"}, - {0x0003A310, 0, 0, "RESOURCE320_WORD4"}, - {0x0003A314, 0, 0, "RESOURCE320_WORD5"}, - {0x0003A318, 0, 0, "RESOURCE320_WORD6"}, -}; - -static const struct radeon_register R600_names_GS_RESOURCE[] = { - {0x0003A4C0, 0, 0, "RESOURCE336_WORD0"}, - {0x0003A4C4, 0, 0, "RESOURCE336_WORD1"}, - {0x0003A4C8, 0, 0, "RESOURCE336_WORD2"}, - {0x0003A4CC, 0, 0, "RESOURCE336_WORD3"}, - {0x0003A4D0, 0, 0, "RESOURCE336_WORD4"}, - {0x0003A4D4, 0, 0, "RESOURCE336_WORD5"}, - {0x0003A4D8, 0, 0, "RESOURCE336_WORD6"}, -}; - -static const struct radeon_register R600_names_PS_SAMPLER[] = { - {0x0003C000, 0, 0, "SQ_TEX_SAMPLER_WORD0_0"}, - {0x0003C004, 0, 0, "SQ_TEX_SAMPLER_WORD1_0"}, - {0x0003C008, 0, 0, "SQ_TEX_SAMPLER_WORD2_0"}, -}; - -static const struct radeon_register R600_names_VS_SAMPLER[] = { - {0x0003C0D8, 0, 0, "SQ_TEX_SAMPLER_WORD0_18"}, - {0x0003C0DC, 0, 0, "SQ_TEX_SAMPLER_WORD1_18"}, - {0x0003C0E0, 0, 0, "SQ_TEX_SAMPLER_WORD2_18"}, -}; - -static const struct radeon_register R600_names_GS_SAMPLER[] = { - {0x0003C1B0, 0, 0, "SQ_TEX_SAMPLER_WORD0_36"}, - {0x0003C1B4, 0, 0, "SQ_TEX_SAMPLER_WORD1_36"}, - {0x0003C1B8, 0, 0, "SQ_TEX_SAMPLER_WORD2_36"}, -}; - -static const struct radeon_register R600_names_PS_SAMPLER_BORDER[] = { - {0x0000A400, 0, 0, "TD_PS_SAMPLER0_BORDER_RED"}, - {0x0000A404, 0, 0, "TD_PS_SAMPLER0_BORDER_GREEN"}, - {0x0000A408, 0, 0, "TD_PS_SAMPLER0_BORDER_BLUE"}, - {0x0000A40C, 0, 0, "TD_PS_SAMPLER0_BORDER_ALPHA"}, -}; - -static const struct radeon_register R600_names_VS_SAMPLER_BORDER[] = { - {0x0000A600, 0, 0, "TD_VS_SAMPLER0_BORDER_RED"}, - {0x0000A604, 0, 0, "TD_VS_SAMPLER0_BORDER_GREEN"}, - {0x0000A608, 0, 0, "TD_VS_SAMPLER0_BORDER_BLUE"}, - {0x0000A60C, 0, 0, "TD_VS_SAMPLER0_BORDER_ALPHA"}, -}; - -static const struct radeon_register R600_names_GS_SAMPLER_BORDER[] = { - {0x0000A800, 0, 0, "TD_GS_SAMPLER0_BORDER_RED"}, - {0x0000A804, 0, 0, "TD_GS_SAMPLER0_BORDER_GREEN"}, - {0x0000A808, 0, 0, "TD_GS_SAMPLER0_BORDER_BLUE"}, - {0x0000A80C, 0, 0, "TD_GS_SAMPLER0_BORDER_ALPHA"}, -}; - -static const struct radeon_register R600_names_CB0[] = { - {0x00028040, 1, 0, "CB_COLOR0_BASE"}, - {0x000280A0, 0, 0, "CB_COLOR0_INFO"}, - {0x00028060, 0, 0, "CB_COLOR0_SIZE"}, - {0x00028080, 0, 0, "CB_COLOR0_VIEW"}, - {0x000280E0, 1, 0, "CB_COLOR0_FRAG"}, - {0x000280C0, 1, 0, "CB_COLOR0_TILE"}, - {0x00028100, 0, 0, "CB_COLOR0_MASK"}, -}; - -static const struct radeon_register R600_names_CB1[] = { - {0x00028044, 1, 0, "CB_COLOR1_BASE"}, - {0x000280A4, 0, 0, "CB_COLOR1_INFO"}, - {0x00028064, 0, 0, "CB_COLOR1_SIZE"}, - {0x00028084, 0, 0, "CB_COLOR1_VIEW"}, - {0x000280E4, 1, 0, "CB_COLOR1_FRAG"}, - {0x000280C4, 1, 0, "CB_COLOR1_TILE"}, - {0x00028104, 0, 0, "CB_COLOR1_MASK"}, -}; - -static const struct radeon_register R600_names_CB2[] = { - {0x00028048, 1, 0, "CB_COLOR2_BASE"}, - {0x000280A8, 0, 0, "CB_COLOR2_INFO"}, - {0x00028068, 0, 0, "CB_COLOR2_SIZE"}, - {0x00028088, 0, 0, "CB_COLOR2_VIEW"}, - {0x000280E8, 1, 0, "CB_COLOR2_FRAG"}, - {0x000280C8, 1, 0, "CB_COLOR2_TILE"}, - {0x00028108, 0, 0, "CB_COLOR2_MASK"}, -}; - -static const struct radeon_register R600_names_CB3[] = { - {0x0002804C, 1, 0, "CB_COLOR3_BASE"}, - {0x000280AC, 0, 0, "CB_COLOR3_INFO"}, - {0x0002806C, 0, 0, "CB_COLOR3_SIZE"}, - {0x0002808C, 0, 0, "CB_COLOR3_VIEW"}, - {0x000280EC, 1, 0, "CB_COLOR3_FRAG"}, - {0x000280CC, 1, 0, "CB_COLOR3_TILE"}, - {0x0002810C, 0, 0, "CB_COLOR3_MASK"}, -}; - -static const struct radeon_register R600_names_CB4[] = { - {0x00028050, 1, 0, "CB_COLOR4_BASE"}, - {0x000280B0, 0, 0, "CB_COLOR4_INFO"}, - {0x00028070, 0, 0, "CB_COLOR4_SIZE"}, - {0x00028090, 0, 0, "CB_COLOR4_VIEW"}, - {0x000280F0, 1, 0, "CB_COLOR4_FRAG"}, - {0x000280D0, 1, 0, "CB_COLOR4_TILE"}, - {0x00028110, 0, 0, "CB_COLOR4_MASK"}, -}; - -static const struct radeon_register R600_names_CB5[] = { - {0x00028054, 1, 0, "CB_COLOR5_BASE"}, - {0x000280B4, 0, 0, "CB_COLOR5_INFO"}, - {0x00028074, 0, 0, "CB_COLOR5_SIZE"}, - {0x00028094, 0, 0, "CB_COLOR5_VIEW"}, - {0x000280F4, 1, 0, "CB_COLOR5_FRAG"}, - {0x000280D4, 1, 0, "CB_COLOR5_TILE"}, - {0x00028114, 0, 0, "CB_COLOR5_MASK"}, -}; - -static const struct radeon_register R600_names_CB6[] = { - {0x00028058, 1, 0, "CB_COLOR6_BASE"}, - {0x000280B8, 0, 0, "CB_COLOR6_INFO"}, - {0x00028078, 0, 0, "CB_COLOR6_SIZE"}, - {0x00028098, 0, 0, "CB_COLOR6_VIEW"}, - {0x000280F8, 1, 0, "CB_COLOR6_FRAG"}, - {0x000280D8, 1, 0, "CB_COLOR6_TILE"}, - {0x00028118, 0, 0, "CB_COLOR6_MASK"}, -}; - -static const struct radeon_register R600_names_CB7[] = { - {0x0002805C, 1, 0, "CB_COLOR7_BASE"}, - {0x000280BC, 0, 0, "CB_COLOR7_INFO"}, - {0x0002807C, 0, 0, "CB_COLOR7_SIZE"}, - {0x0002809C, 0, 0, "CB_COLOR7_VIEW"}, - {0x000280FC, 1, 0, "CB_COLOR7_FRAG"}, - {0x000280DC, 1, 0, "CB_COLOR7_TILE"}, - {0x0002811C, 0, 0, "CB_COLOR7_MASK"}, -}; - -static const struct radeon_register R600_names_DB[] = { - {0x0002800C, 1, 0, "DB_DEPTH_BASE"}, - {0x00028000, 0, 0, "DB_DEPTH_SIZE"}, - {0x00028004, 0, 0, "DB_DEPTH_VIEW"}, - {0x00028010, 0, 0, "DB_DEPTH_INFO"}, - {0x00028D24, 0, 0, "DB_HTILE_SURFACE"}, - {0x00028D34, 0, 0, "DB_PREFETCH_LIMIT"}, -}; - -static const struct radeon_register R600_names_VGT[] = { - {0x00008958, 0, 0, "VGT_PRIMITIVE_TYPE"}, - {0x00028400, 0, 0, "VGT_MAX_VTX_INDX"}, - {0x00028404, 0, 0, "VGT_MIN_VTX_INDX"}, - {0x00028408, 0, 0, "VGT_INDX_OFFSET"}, - {0x0002840C, 0, 0, "VGT_MULTI_PRIM_IB_RESET_INDX"}, - {0x00028A7C, 0, 0, "VGT_DMA_INDEX_TYPE"}, - {0x00028A84, 0, 0, "VGT_PRIMITIVEID_EN"}, - {0x00028A88, 0, 0, "VGT_DMA_NUM_INSTANCES"}, - {0x00028A94, 0, 0, "VGT_MULTI_PRIM_IB_RESET_EN"}, - {0x00028AA0, 0, 0, "VGT_INSTANCE_STEP_RATE_0"}, - {0x00028AA4, 0, 0, "VGT_INSTANCE_STEP_RATE_1"}, -}; - -static const struct radeon_register R600_names_DRAW[] = { - {0x00008970, 0, 0, "VGT_NUM_INDICES"}, - {0x000287E4, 0, 0, "VGT_DMA_BASE_HI"}, - {0x000287E8, 1, 0, "VGT_DMA_BASE"}, - {0x000287F0, 0, 0, "VGT_DRAW_INITIATOR"}, -}; - -static const struct radeon_register R600_names_VGT_EVENT[] = { - {0x00028A90, 1, 0, "VGT_EVENT_INITIATOR"}, -}; - -static const struct radeon_register R600_names_CB_FLUSH[] = { -}; - -static const struct radeon_register R600_names_DB_FLUSH[] = { -}; - -#endif diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h index c5d5fe9ddf..d91f7737af 100644 --- a/src/gallium/winsys/r600/drm/r600d.h +++ b/src/gallium/winsys/r600/drm/r600d.h @@ -28,7 +28,7 @@ /* evergreen values */ #define EG_RESOURCE_OFFSET 0x00030000 -#define EG_RESOURCE_END 0x00030400 +#define EG_RESOURCE_END 0x00034000 #define EG_LOOP_CONST_OFFSET 0x0003A200 #define EG_LOOP_CONST_END 0x0003A26C #define EG_BOOL_CONST_OFFSET 0x0003A500 @@ -91,6 +91,7 @@ #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 #define EVENT_TYPE_ZPASS_DONE 0x15 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 @@ -839,6 +840,16 @@ #define R_028800_DB_DEPTH_CONTROL 0x028800 #define R_02880C_DB_SHADER_CONTROL 0x02880C #define R_028D0C_DB_RENDER_CONTROL 0x028D0C +#define S_028D0C_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0) +#define S_028D0C_STENCIL_CLEAR_ENABLE(x) (((x) & 0x1) << 1) +#define S_028D0C_DEPTH_COPY_ENABLE(x) (((x) & 0x1) << 2) +#define S_028D0C_STENCIL_COPY_ENABLE(x) (((x) & 0x1) << 3) +#define S_028D0C_RESUMMARIZE_ENABLE(x) (((x) & 0x1) << 4) +#define S_028D0C_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) +#define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) +#define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7) +#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x1) << 8) +#define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15) #define R_028D10_DB_RENDER_OVERRIDE 0x028D10 #define R_028D2C_DB_SRESULTS_COMPARE_STATE1 0x028D2C #define R_028D30_DB_PRELOAD_CONTROL 0x028D30 @@ -2190,4 +2201,12 @@ #define R_038014_RESOURCE0_WORD5 0x038014 #define R_038018_RESOURCE0_WORD6 0x038018 +#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140 +#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180 +#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940 +#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980 + +#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 +#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 + #endif diff --git a/src/gallium/winsys/r600/drm/radeon.c b/src/gallium/winsys/r600/drm/radeon.c deleted file mode 100644 index f39d020559..0000000000 --- a/src/gallium/winsys/r600/drm/radeon.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org> - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <unistd.h> -#include <string.h> -#include <errno.h> -#include <pipebuffer/pb_bufmgr.h> -#include "xf86drm.h" -#include "radeon_priv.h" -#include "radeon_drm.h" - -enum radeon_family radeon_get_family(struct radeon *radeon) -{ - return radeon->family; -} - -void radeon_set_mem_constant(struct radeon *radeon, boolean state) -{ - radeon->use_mem_constant = state; -} - -static int radeon_get_device(struct radeon *radeon) -{ - struct drm_radeon_info info; - int r; - - radeon->device = 0; - info.request = RADEON_INFO_DEVICE_ID; - info.value = (uintptr_t)&radeon->device; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - return r; -} - -struct radeon *radeon_new(int fd, unsigned device) -{ - struct radeon *radeon; - int r, i, id, j, k; - - radeon = calloc(1, sizeof(*radeon)); - if (radeon == NULL) { - return NULL; - } - radeon->fd = fd; - radeon->device = device; - radeon->refcount = 1; - if (fd >= 0) { - r = radeon_get_device(radeon); - if (r) { - fprintf(stderr, "Failed to get device id\n"); - return radeon_decref(radeon); - } - } - radeon->family = radeon_family_from_device(radeon->device); - if (radeon->family == CHIP_UNKNOWN) { - fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device); - return radeon_decref(radeon); - } - switch (radeon->family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - if (r600_init(radeon)) { - return radeon_decref(radeon); - } - break; - case CHIP_R100: - case CHIP_RV100: - case CHIP_RS100: - case CHIP_RV200: - case CHIP_RS200: - case CHIP_R200: - case CHIP_RV250: - case CHIP_RS300: - case CHIP_RV280: - case CHIP_R300: - case CHIP_R350: - case CHIP_RV350: - case CHIP_RV380: - case CHIP_R420: - case CHIP_R423: - case CHIP_RV410: - case CHIP_RS400: - case CHIP_RS480: - case CHIP_RS600: - case CHIP_RS690: - case CHIP_RS740: - case CHIP_RV515: - case CHIP_R520: - case CHIP_RV530: - case CHIP_RV560: - case CHIP_RV570: - case CHIP_R580: - default: - fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n", - __func__, radeon->device); - break; - } - - radeon->mman = pb_malloc_bufmgr_create(); - if (!radeon->mman) - return NULL; - radeon->kman = radeon_bo_pbmgr_create(radeon); - if (!radeon->kman) - return NULL; - radeon->cman = pb_cache_manager_create(radeon->kman, 100000); - if (!radeon->cman) - return NULL; - return radeon; -} - -struct radeon *radeon_incref(struct radeon *radeon) -{ - if (radeon == NULL) - return NULL; - radeon->refcount++; - return radeon; -} - -struct radeon *radeon_decref(struct radeon *radeon) -{ - if (radeon == NULL) - return NULL; - if (--radeon->refcount > 0) { - return NULL; - } - - radeon->mman->destroy(radeon->mman); - radeon->cman->destroy(radeon->cman); - radeon->kman->destroy(radeon->kman); - drmClose(radeon->fd); - free(radeon); - return NULL; -} diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 51ce864974..14a00161c8 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -29,10 +29,45 @@ #include <string.h> #include <sys/mman.h> #include <errno.h> -#include "radeon_priv.h" +#include "r600_priv.h" #include "xf86drm.h" #include "radeon_drm.h" +static int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo) +{ + struct drm_radeon_gem_mmap args; + void *ptr; + int r; + + /* Zero out args to make valgrind happy */ + memset(&args, 0, sizeof(args)); + args.handle = bo->handle; + args.offset = 0; + args.size = (uint64_t)bo->size; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_MMAP, + &args, sizeof(args)); + if (r) { + fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n", + bo, bo->handle, r); + return r; + } + ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->fd, args.addr_ptr); + if (ptr == MAP_FAILED) { + fprintf(stderr, "%s failed to map bo\n", __func__); + return -errno; + } + bo->data = ptr; + + bo->map_count++; + return 0; +} + +static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo) +{ + munmap(bo->data, bo->size); + bo->data = NULL; +} + struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, unsigned size, unsigned alignment, void *ptr) { @@ -60,6 +95,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, } bo->handle = open_arg.handle; bo->size = open_arg.size; + bo->shared = TRUE; } else { struct drm_radeon_gem_create args; @@ -79,65 +115,24 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, return NULL; } } + if (radeon_bo_fixed_map(radeon, bo)) { + R600_ERR("failed to map bo\n"); + radeon_bo_reference(radeon, &bo, NULL); + return bo; + } if (ptr) { - if (radeon_bo_map(radeon, bo)) { - fprintf(stderr, "%s failed to copy data into bo\n", __func__); - radeon_bo_reference(radeon, &bo, NULL); - return bo; - } memcpy(bo->data, ptr, size); - radeon_bo_unmap(radeon, bo); } + LIST_INITHEAD(&bo->fencedlist); return bo; } -int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo) -{ - struct drm_radeon_gem_mmap args; - void *ptr; - int r; - - if (bo->map_count != 0) { - goto success; - } - /* Zero out args to make valgrind happy */ - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - args.offset = 0; - args.size = (uint64_t)bo->size; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_MMAP, - &args, sizeof(args)); - if (r) { - fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n", - bo, bo->handle, r); - return r; - } - ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->fd, args.addr_ptr); - if (ptr == MAP_FAILED) { - fprintf(stderr, "%s failed to map bo\n", __func__); - return -errno; - } - bo->data = ptr; - -success: - bo->map_count++; - - return 0; -} - -void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo) -{ - if (--bo->map_count > 0) { - return; - } - munmap(bo->data, bo->size); - bo->data = NULL; -} - static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo) { struct drm_gem_close args; + LIST_DEL(&bo->fencedlist); + radeon_bo_fixed_unmap(radeon, bo); memset(&args, 0, sizeof(args)); args.handle = bo->handle; drmIoctl(radeon->fd, DRM_IOCTL_GEM_CLOSE, &args); @@ -161,6 +156,15 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) struct drm_radeon_gem_wait_idle args; int ret; + if (!bo->fence && !bo->shared) + return 0; + + if (bo->fence <= *bo->ctx->cfence) { + LIST_DELINIT(&bo->fencedlist); + bo->fence = 0; + return 0; + } + /* Zero out args to make valgrind happy */ memset(&args, 0, sizeof(args)); args.handle = bo->handle; @@ -173,16 +177,26 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain) { - struct drm_radeon_gem_busy args; - int ret; + struct drm_radeon_gem_busy args; + int ret; + + if (!bo->shared) { + if (!bo->fence) + return 0; + if (bo->fence <= *bo->ctx->cfence) { + LIST_DELINIT(&bo->fencedlist); + bo->fence = 0; + return 0; + } + } - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - args.domain = 0; + memset(&args, 0, sizeof(args)); + args.handle = bo->handle; + args.domain = 0; - ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY, - &args, sizeof(args)); + ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY, + &args, sizeof(args)); - *domain = args.domain; - return ret; + *domain = args.domain; + return ret; } diff --git a/src/gallium/winsys/r600/drm/radeon_bo_pb.c b/src/gallium/winsys/r600/drm/radeon_bo_pb.c index 65ba96233d..a3452027f2 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo_pb.c +++ b/src/gallium/winsys/r600/drm/radeon_bo_pb.c @@ -1,10 +1,34 @@ -#include "radeon_priv.h" - -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_double_list.h" -#include "pipebuffer/pb_buffer.h" -#include "pipebuffer/pb_bufmgr.h" +/* + * Copyright 2010 Dave Airlie + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Dave Airlie + */ +#include <util/u_inlines.h> +#include <util/u_memory.h> +#include <util/u_double_list.h> +#include <pipebuffer/pb_buffer.h> +#include <pipebuffer/pb_bufmgr.h> +#include "r600_priv.h" struct radeon_bo_pb { struct pb_buffer b; @@ -53,28 +77,49 @@ radeon_bo_pb_map_internal(struct pb_buffer *_buf, unsigned flags, void *ctx) { struct radeon_bo_pb *buf = radeon_bo_pb(_buf); - - if (flags & PB_USAGE_DONTBLOCK) { - if (p_atomic_read(&buf->bo->reference.count) > 1) + struct pipe_context *pctx = ctx; + + if (flags & PB_USAGE_UNSYNCHRONIZED) { + if (!buf->bo->data && radeon_bo_map(buf->mgr->radeon, buf->bo)) { return NULL; - } - if (buf->bo->data != NULL) { + } LIST_DELINIT(&buf->maplist); return buf->bo->data; } + if (p_atomic_read(&buf->bo->reference.count) > 1) { + if (flags & PB_USAGE_DONTBLOCK) { + return NULL; + } + if (ctx) { + pctx->flush(pctx, 0, NULL); + } + } + if (flags & PB_USAGE_DONTBLOCK) { uint32_t domain; if (radeon_bo_busy(buf->mgr->radeon, buf->bo, &domain)) return NULL; + if (radeon_bo_map(buf->mgr->radeon, buf->bo)) { + return NULL; + } + goto out; } - if (p_atomic_read(&buf->bo->reference.count) > 1 && ctx) { - r600_flush_ctx(ctx); - } - if (radeon_bo_map(buf->mgr->radeon, buf->bo)) { - return NULL; + if (buf->bo->data != NULL) { + if (radeon_bo_wait(buf->mgr->radeon, buf->bo)) { + return NULL; + } + } else { + if (radeon_bo_map(buf->mgr->radeon, buf->bo)) { + return NULL; + } + if (radeon_bo_wait(buf->mgr->radeon, buf->bo)) { + radeon_bo_unmap(buf->mgr->radeon, buf->bo); + return NULL; + } } +out: LIST_DELINIT(&buf->maplist); return buf->bo->data; } @@ -158,7 +203,6 @@ radeon_bo_pb_create_buffer(struct pb_manager *_mgr, struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr); struct radeon *radeon = mgr->radeon; struct radeon_bo_pb *bo; - uint32_t domain; bo = CALLOC_STRUCT(radeon_bo_pb); if (!bo) diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c deleted file mode 100644 index 7ccb524590..0000000000 --- a/src/gallium/winsys/r600/drm/radeon_ctx.c +++ /dev/null @@ -1,376 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include "radeon_priv.h" -#include "radeon_drm.h" -#include "bof.h" - -static int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_ws_bo *bo) -{ - if (ctx->nbo >= RADEON_CTX_MAX_PM4) - return -EBUSY; - /* take a reference to the kernel bo */ - radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->nbo], radeon_bo_pb_get_bo(bo->pb)); - ctx->nbo++; - return 0; -} - -static void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement) -{ - struct radeon_cs_reloc *greloc; - unsigned i; - - placement[0] = 0; - placement[1] = 0; - greloc = (void *)(((u8 *)ctx->reloc) + reloc * 4); - for (i = 0; i < ctx->nbo; i++) { - if (ctx->bo[i]->handle == greloc->handle) { - placement[0] = greloc->read_domain | greloc->write_domain; - placement[1] = placement[0]; - return; - } - } -} - -void radeon_ctx_clear(struct radeon_ctx *ctx) -{ - for (int i = 0; i < ctx->nbo; i++) { - radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL); - } - ctx->ndwords = RADEON_CTX_MAX_PM4; - ctx->cdwords = 0; - ctx->nreloc = 0; - ctx->nbo = 0; -} - -struct radeon_ctx *radeon_ctx_init(struct radeon *radeon) -{ - struct radeon_ctx *ctx; - if (radeon == NULL) - return NULL; - ctx = calloc(1, sizeof(struct radeon_ctx)); - ctx->radeon = radeon_incref(radeon); - radeon_ctx_clear(ctx); - ctx->pm4 = malloc(RADEON_CTX_MAX_PM4 * 4); - if (ctx->pm4 == NULL) { - radeon_ctx_fini(ctx); - return NULL; - } - ctx->reloc = malloc(sizeof(struct radeon_cs_reloc) * RADEON_CTX_MAX_PM4); - if (ctx->reloc == NULL) { - radeon_ctx_fini(ctx); - return NULL; - } - ctx->bo = calloc(sizeof(void *), RADEON_CTX_MAX_PM4); - if (ctx->bo == NULL) { - radeon_ctx_fini(ctx); - return NULL; - } - return ctx; -} - -void radeon_ctx_fini(struct radeon_ctx *ctx) -{ - unsigned i; - - if (ctx == NULL) - return; - - for (i = 0; i < ctx->nbo; i++) { - radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL); - } - ctx->radeon = radeon_decref(ctx->radeon); - free(ctx->bo); - free(ctx->pm4); - free(ctx->reloc); - free(ctx); -} - -static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *state) -{ - unsigned i, j; - int r; - struct radeon_bo *state_bo; - if (state == NULL) - return 0; - for (i = 0; i < state->nbo; i++) { - for (j = 0; j < ctx->nbo; j++) { - state_bo = radeon_bo_pb_get_bo(state->bo[i]->pb); - if (state_bo == ctx->bo[j]) - break; - } - if (j == ctx->nbo) { - r = radeon_ctx_set_bo_new(ctx, state->bo[i]); - if (r) - return r; - } - } - return 0; -} - - -int radeon_ctx_submit(struct radeon_ctx *ctx) -{ - struct drm_radeon_cs drmib; - struct drm_radeon_cs_chunk chunks[2]; - uint64_t chunk_array[2]; - int r = 0; - - if (!ctx->cdwords) - return 0; - - radeon_bo_pbmgr_flush_maps(ctx->radeon->kman); -#if 0 - for (r = 0; r < ctx->cdwords; r++) { - fprintf(stderr, "0x%08X\n", ctx->pm4[r]); - } -#endif - drmib.num_chunks = 2; - drmib.chunks = (uint64_t)(uintptr_t)chunk_array; - chunks[0].chunk_id = RADEON_CHUNK_ID_IB; - chunks[0].length_dw = ctx->cdwords; - chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4; - chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; - chunks[1].length_dw = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4; - chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc; - chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0]; - chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1]; -#if 1 - r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, - sizeof(struct drm_radeon_cs)); -#endif - return r; -} - -static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_ws_bo *bo, - unsigned id, unsigned *placement) -{ - unsigned i; - unsigned bo_handle = radeon_ws_bo_get_handle(bo); - - for (i = 0; i < ctx->nreloc; i++) { - if (ctx->reloc[i].handle == bo_handle) { - ctx->pm4[id] = i * sizeof(struct radeon_cs_reloc) / 4; - return 0; - } - } - if (ctx->nreloc >= RADEON_CTX_MAX_PM4) { - return -EBUSY; - } - ctx->reloc[ctx->nreloc].handle = bo_handle; - ctx->reloc[ctx->nreloc].read_domain = placement[0] | placement [1]; - ctx->reloc[ctx->nreloc].write_domain = placement[0] | placement [1]; - ctx->reloc[ctx->nreloc].flags = 0; - ctx->pm4[id] = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4; - ctx->nreloc++; - return 0; -} - -static int radeon_ctx_state_schedule(struct radeon_ctx *ctx, struct radeon_state *state) -{ - unsigned i, rid, bid, cid; - int r; - - if (state == NULL) - return 0; - if (state->cpm4 > ctx->ndwords) { - return -EBUSY; - } - memcpy(&ctx->pm4[ctx->cdwords], state->pm4, state->cpm4 * 4); - for (i = 0; i < state->nreloc; i++) { - rid = state->reloc_pm4_id[i]; - bid = state->reloc_bo_id[i]; - cid = ctx->cdwords + rid; - r = radeon_ctx_reloc(ctx, state->bo[bid], cid, - &state->placement[bid * 2]); - if (r) { - fprintf(stderr, "%s state %d failed to reloc\n", __func__, state->stype->stype); - return r; - } - } - ctx->cdwords += state->cpm4; - ctx->ndwords -= state->cpm4; - return 0; -} - -int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state) -{ - int r = 0; - - /* !!! ONLY ACCEPT QUERY STATE HERE !!! */ - r = radeon_state_pm4(state); - if (r) - return r; - /* BEGIN/END query are balanced in the same cs so account for END - * END query when scheduling BEGIN query - */ - switch (state->stype->stype) { - case R600_STATE_QUERY_BEGIN: - /* is there enough place for begin & end */ - if ((state->cpm4 * 2) > ctx->ndwords) - return -EBUSY; - ctx->ndwords -= state->cpm4; - break; - case R600_STATE_QUERY_END: - ctx->ndwords += state->cpm4; - break; - default: - return -EINVAL; - } - return radeon_ctx_state_schedule(ctx, state); -} - -int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) -{ - unsigned previous_cdwords; - int r = 0; - int i; - - for (i = 0; i < ctx->radeon->max_states; i++) { - r = radeon_ctx_state_bo(ctx, draw->state[i]); - if (r) - return r; - } - previous_cdwords = ctx->cdwords; - for (i = 0; i < ctx->radeon->max_states; i++) { - if (draw->state[i]) { - r = radeon_ctx_state_schedule(ctx, draw->state[i]); - if (r) { - ctx->cdwords = previous_cdwords; - return r; - } - } - } - - return 0; -} - -#if 0 -int radeon_ctx_pm4(struct radeon_ctx *ctx) -{ - unsigned i; - int r; - - free(ctx->pm4); - ctx->cpm4 = 0; - ctx->pm4 = malloc(ctx->draw_cpm4 * 4); - if (ctx->pm4 == NULL) - return -EINVAL; - for (i = 0, ctx->id = 0; i < ctx->nstate; i++) { - } - if (ctx->id != ctx->draw_cpm4) { - fprintf(stderr, "%s miss predicted pm4 size %d for %d\n", - __func__, ctx->draw_cpm4, ctx->id); - return -EINVAL; - } - ctx->cpm4 = ctx->draw_cpm4; - return 0; -} -#endif - -void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file) -{ - bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root; - unsigned i; - unsigned bo_size; - root = device_id = bcs = blob = array = bo = size = handle = NULL; - root = bof_object(); - if (root == NULL) - goto out_err; - device_id = bof_int32(ctx->radeon->device); - if (device_id == NULL) - return; - if (bof_object_set(root, "device_id", device_id)) - goto out_err; - bof_decref(device_id); - device_id = NULL; - /* dump relocs */ - blob = bof_blob(ctx->nreloc * 16, ctx->reloc); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "reloc", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump cs */ - blob = bof_blob(ctx->cdwords * 4, ctx->pm4); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "pm4", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump bo */ - array = bof_array(); - if (array == NULL) - goto out_err; - for (i = 0; i < ctx->nbo; i++) { - bo = bof_object(); - if (bo == NULL) - goto out_err; - bo_size = ctx->bo[i]->size; - size = bof_int32(bo_size); - if (size == NULL) - goto out_err; - if (bof_object_set(bo, "size", size)) - goto out_err; - bof_decref(size); - size = NULL; - handle = bof_int32(ctx->bo[i]->handle); - if (handle == NULL) - goto out_err; - if (bof_object_set(bo, "handle", handle)) - goto out_err; - bof_decref(handle); - handle = NULL; - radeon_bo_map(ctx->radeon, ctx->bo[i]); - blob = bof_blob(bo_size, ctx->bo[i]->data); - radeon_bo_unmap(ctx->radeon, ctx->bo[i]); - if (blob == NULL) - goto out_err; - if (bof_object_set(bo, "data", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - if (bof_array_append(array, bo)) - goto out_err; - bof_decref(bo); - bo = NULL; - } - if (bof_object_set(root, "bo", array)) - goto out_err; - bof_dump_file(root, file); -out_err: - bof_decref(blob); - bof_decref(array); - bof_decref(bo); - bof_decref(size); - bof_decref(handle); - bof_decref(device_id); - bof_decref(root); -} diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c index dd6156d585..08cc1c41e3 100644 --- a/src/gallium/winsys/r600/drm/radeon_pciid.c +++ b/src/gallium/winsys/r600/drm/radeon_pciid.c @@ -24,7 +24,7 @@ * Jerome Glisse */ #include <stdlib.h> -#include "radeon_priv.h" +#include "r600.h" struct pci_id { unsigned vendor; diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h deleted file mode 100644 index c284f6aa7d..0000000000 --- a/src/gallium/winsys/r600/drm/radeon_priv.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org> - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ -#ifndef RADEON_PRIV_H -#define RADEON_PRIV_H - -#include <stdint.h> -#include "xf86drm.h" -#include "xf86drmMode.h" -#include <errno.h> -#include "radeon.h" - -#include "pipe/p_compiler.h" -#include "util/u_inlines.h" -#include "pipe/p_defines.h" - -struct radeon; -struct radeon_ctx; - - -/* - * radeon functions - */ -typedef int (*radeon_state_pm4_t)(struct radeon_state *state); -struct radeon_register { - unsigned offset; - unsigned need_reloc; - unsigned bo_id; - char name[64]; -}; - -struct radeon_bo { - struct pipe_reference reference; - unsigned handle; - unsigned size; - unsigned alignment; - unsigned map_count; - void *data; -}; - -struct radeon_sub_type { - int shader_type; - const struct radeon_register *regs; - unsigned nstates; -}; - -struct radeon_stype_info { - unsigned stype; - unsigned num; - unsigned stride; - radeon_state_pm4_t pm4; - struct radeon_sub_type reginfo[R600_SHADER_MAX]; - unsigned base_id; - unsigned npm4; -}; - -struct radeon_ctx { - struct radeon *radeon; - u32 *pm4; - int cdwords; - int ndwords; - unsigned nreloc; - struct radeon_cs_reloc *reloc; - unsigned nbo; - struct radeon_bo **bo; -}; - -struct radeon { - int fd; - int refcount; - unsigned device; - unsigned family; - unsigned nstype; - struct radeon_stype_info *stype; - unsigned max_states; - boolean use_mem_constant; /* true for evergreen */ - struct pb_manager *mman; /* malloc manager */ - struct pb_manager *kman; /* kernel bo manager */ - struct pb_manager *cman; /* cached bo manager */ -}; - -struct radeon_ws_bo { - struct pipe_reference reference; - struct pb_buffer *pb; -}; - -extern struct radeon *radeon_new(int fd, unsigned device); -extern struct radeon *radeon_incref(struct radeon *radeon); -extern struct radeon *radeon_decref(struct radeon *radeon); -extern unsigned radeon_family_from_device(unsigned device); -extern int radeon_is_family_compatible(unsigned family1, unsigned family2); - -/* - * r600/r700 context functions - */ -extern int r600_init(struct radeon *radeon); -extern int r600_ctx_draw(struct radeon_ctx *ctx); -extern int r600_ctx_next_reloc(struct radeon_ctx *ctx, unsigned *reloc); - -/* - * radeon state functions - */ -extern u32 radeon_state_register_get(struct radeon_state *state, unsigned offset); -extern int radeon_state_register_set(struct radeon_state *state, unsigned offset, u32 value); -extern struct radeon_state *radeon_state_duplicate(struct radeon_state *state); -extern int radeon_state_replace_always(struct radeon_state *ostate, struct radeon_state *nstate); -extern int radeon_state_pm4_generic(struct radeon_state *state); -extern int radeon_state_reloc(struct radeon_state *state, unsigned id, unsigned bo_id); - -/* - * radeon draw functions - */ -extern int radeon_draw_pm4(struct radeon_draw *draw); - -/* ws bo winsys only */ -unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo *bo); -unsigned radeon_ws_bo_get_size(struct radeon_ws_bo *bo); - -/* bo */ -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, void *ptr); -int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo); -void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo); -void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst, - struct radeon_bo *src); -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); -int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain); - -/* pipebuffer kernel bo manager */ -struct pb_manager *radeon_bo_pbmgr_create(struct radeon *radeon); -struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf); -void radeon_bo_pbmgr_flush_maps(struct pb_manager *_mgr); -struct pb_buffer *radeon_bo_pb_create_buffer_from_handle(struct pb_manager *_mgr, - uint32_t handle); - -#endif diff --git a/src/gallium/winsys/r600/drm/radeon_state.c b/src/gallium/winsys/r600/drm/radeon_state.c deleted file mode 100644 index b237b39c2b..0000000000 --- a/src/gallium/winsys/r600/drm/radeon_state.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include "radeon_priv.h" - -/* - * state core functions - */ -int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 stype, u32 id, u32 shader_type) -{ - struct radeon_stype_info *found = NULL; - int i, j, shader_index = -1; - - /* traverse the stype array */ - for (i = 0; i < radeon->nstype; i++) { - /* if the type doesn't match, if the shader doesn't match */ - if (stype != radeon->stype[i].stype) - continue; - if (shader_type) { - for (j = 0; j < 4; j++) { - if (radeon->stype[i].reginfo[j].shader_type == shader_type) { - shader_index = j; - break; - } - } - if (shader_index == -1) - continue; - } else { - if (radeon->stype[i].reginfo[0].shader_type) - continue; - else - shader_index = 0; - } - if (id > radeon->stype[i].num) - continue; - - found = &radeon->stype[i]; - break; - } - - if (!found) { - fprintf(stderr, "%s invalid type %d/id %d/shader class %d\n", __func__, stype, id, shader_type); - return -EINVAL; - } - - memset(state, 0, sizeof(struct radeon_state)); - state->stype = found; - state->state_id = state->stype->num * shader_index + state->stype->base_id + id; - state->radeon = radeon; - state->id = id; - state->shader_index = shader_index; - state->refcount = 1; - state->npm4 = found->npm4; - state->nstates = found->reginfo[shader_index].nstates; - return 0; -} - -int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type) -{ - struct radeon_stype_info *found = NULL; - int i, j, shader_index = -1; - - if (state == NULL) - return 0; - /* traverse the stype array */ - for (i = 0; i < state->radeon->nstype; i++) { - /* if the type doesn't match, if the shader doesn't match */ - if (stype != state->radeon->stype[i].stype) - continue; - if (shader_type) { - for (j = 0; j < 4; j++) { - if (state->radeon->stype[i].reginfo[j].shader_type == shader_type) { - shader_index = j; - break; - } - } - if (shader_index == -1) - continue; - } else { - if (state->radeon->stype[i].reginfo[0].shader_type) - continue; - else - shader_index = 0; - } - if (id > state->radeon->stype[i].num) - continue; - - found = &state->radeon->stype[i]; - break; - } - - if (!found) { - fprintf(stderr, "%s invalid type %d/id %d/shader class %d\n", __func__, stype, id, shader_type); - return -EINVAL; - } - - if (found->reginfo[shader_index].nstates != state->nstates) { - fprintf(stderr, "invalid type change from (%d %d %d) to (%d %d %d)\n", - state->stype->stype, state->id, state->shader_index, stype, id, shader_index); - } - - state->stype = found; - state->id = id; - state->shader_index = shader_index; - state->state_id = state->stype->num * shader_index + state->stype->base_id + id; - return radeon_state_pm4(state); -} - -void radeon_state_fini(struct radeon_state *state) -{ - unsigned i; - - if (state == NULL) - return NULL; - for (i = 0; i < state->nbo; i++) { - radeon_ws_bo_reference(state->radeon, &state->bo[i], NULL); - } - memset(state, 0, sizeof(struct radeon_state)); -} - -int radeon_state_replace_always(struct radeon_state *ostate, - struct radeon_state *nstate) -{ - return 1; -} - -int radeon_state_pm4_generic(struct radeon_state *state) -{ - return -EINVAL; -} - -static u32 crc32(void *d, size_t len) -{ - u16 *data = (uint16_t*)d; - u32 sum1 = 0xffff, sum2 = 0xffff; - - len = len >> 1; - while (len) { - unsigned tlen = len > 360 ? 360 : len; - len -= tlen; - do { - sum1 += *data++; - sum2 += sum1; - } while (--tlen); - sum1 = (sum1 & 0xffff) + (sum1 >> 16); - sum2 = (sum2 & 0xffff) + (sum2 >> 16); - } - /* Second reduction step to reduce sums to 16 bits */ - sum1 = (sum1 & 0xffff) + (sum1 >> 16); - sum2 = (sum2 & 0xffff) + (sum2 >> 16); - return sum2 << 16 | sum1; -} - -int radeon_state_pm4(struct radeon_state *state) -{ - int r; - - if (state == NULL) - return 0; - state->cpm4 = 0; - r = state->stype->pm4(state); - if (r) { - fprintf(stderr, "%s failed to build PM4 for state(%d %d)\n", - __func__, state->stype->stype, state->id); - return r; - } - state->pm4_crc = crc32(state->pm4, state->cpm4 * 4); - return 0; -} - -int radeon_state_reloc(struct radeon_state *state, unsigned id, unsigned bo_id) -{ - state->reloc_pm4_id[state->nreloc] = id; - state->reloc_bo_id[state->nreloc] = bo_id; - state->nreloc++; - return 0; -} diff --git a/src/gallium/winsys/r600/drm/radeon_ws_bo.c b/src/gallium/winsys/r600/drm/radeon_ws_bo.c deleted file mode 100644 index 8114526a14..0000000000 --- a/src/gallium/winsys/r600/drm/radeon_ws_bo.c +++ /dev/null @@ -1,106 +0,0 @@ -#include <malloc.h> -#include <pipe/p_screen.h> -#include <pipebuffer/pb_bufmgr.h> -#include "radeon_priv.h" - -struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon, - unsigned size, unsigned alignment, unsigned usage) -{ - struct radeon_ws_bo *ws_bo = calloc(1, sizeof(struct radeon_ws_bo)); - struct pb_desc desc; - struct pb_manager *man; - - desc.alignment = alignment; - desc.usage = usage; - - if (!radeon->use_mem_constant && (usage & PIPE_BIND_CONSTANT_BUFFER)) { - man = radeon->mman; - } else if (usage & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - man = radeon->cman; - else - man = radeon->kman; - - ws_bo->pb = man->create_buffer(man, size, &desc); - if (ws_bo->pb == NULL) { - free(ws_bo); - return NULL; - } - - pipe_reference_init(&ws_bo->reference, 1); - return ws_bo; -} - -struct radeon_ws_bo *radeon_ws_bo_handle(struct radeon *radeon, - unsigned handle) -{ - struct radeon_ws_bo *ws_bo = calloc(1, sizeof(struct radeon_ws_bo)); - - ws_bo->pb = radeon_bo_pb_create_buffer_from_handle(radeon->kman, handle); - if (!ws_bo->pb) { - free(ws_bo); - return NULL; - } - pipe_reference_init(&ws_bo->reference, 1); - return ws_bo; -} - -void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned usage, void *ctx) -{ - return pb_map(bo->pb, usage, ctx); -} - -void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo) -{ - pb_unmap(bo->pb); -} - -static void radeon_ws_bo_destroy(struct radeon *radeon, struct radeon_ws_bo *bo) -{ - if (bo->pb) - pb_reference(&bo->pb, NULL); - free(bo); -} - -void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, - struct radeon_ws_bo *src) -{ - struct radeon_ws_bo *old = *dst; - - if (pipe_reference(&(*dst)->reference, &src->reference)) { - radeon_ws_bo_destroy(radeon, old); - } - *dst = src; -} - -int radeon_ws_bo_wait(struct radeon *radeon, struct radeon_ws_bo *pb_bo) -{ - /* TODO */ - struct radeon_bo *bo; - bo = radeon_bo_pb_get_bo(pb_bo->pb); - if (!bo) - return 0; - radeon_bo_wait(radeon, bo); - return 0; -} - -unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo *pb_bo) -{ - struct radeon_bo *bo; - - bo = radeon_bo_pb_get_bo(pb_bo->pb); - if (!bo) - return 0; - - return bo->handle; -} - -unsigned radeon_ws_bo_get_size(struct radeon_ws_bo *pb_bo) -{ - struct radeon_bo *bo; - - bo = radeon_bo_pb_get_bo(pb_bo->pb); - if (!bo) - return 0; - - return bo->size; -} diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index cf665241c4..78723948d4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -11,8 +11,10 @@ #include "util/u_simple_list.h" #include "pipebuffer/pb_buffer.h" #include "pipebuffer/pb_bufmgr.h" +#include "os/os_thread.h" #include "radeon_winsys.h" + struct radeon_drm_bufmgr; struct radeon_drm_buffer { @@ -39,10 +41,19 @@ radeon_drm_buffer(struct pb_buffer *buf) } struct radeon_drm_bufmgr { + /* Base class. */ struct pb_manager base; + + /* Winsys. */ struct radeon_libdrm_winsys *rws; + + /* List of mapped buffers and its mutex. */ struct radeon_drm_buffer buffer_map_list; + pipe_mutex buffer_map_list_mutex; + + /* List of buffer handles and its mutex. */ struct util_hash_table *buffer_handles; + pipe_mutex buffer_handles_mutex; }; static INLINE struct radeon_drm_bufmgr * @@ -59,14 +70,21 @@ radeon_drm_buffer_destroy(struct pb_buffer *_buf) int name; if (buf->bo->ptr != NULL) { - remove_from_list(buf); - radeon_bo_unmap(buf->bo); - buf->bo->ptr = NULL; + pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); + /* Now test it again inside the mutex. */ + if (buf->bo->ptr != NULL) { + remove_from_list(buf); + radeon_bo_unmap(buf->bo); + buf->bo->ptr = NULL; + } + pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); } name = radeon_gem_name_bo(buf->bo); if (name) { + pipe_mutex_lock(buf->mgr->buffer_handles_mutex); util_hash_table_remove(buf->mgr->buffer_handles, (void*)(uintptr_t)name); + pipe_mutex_unlock(buf->mgr->buffer_handles_mutex); } radeon_bo_unref(buf->bo); @@ -118,8 +136,16 @@ radeon_drm_buffer_map_internal(struct pb_buffer *_buf, return NULL; } - if (buf->bo->ptr != NULL) + if (buf->bo->ptr != NULL) { + pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); + /* Now test ptr again inside the mutex. We might have gotten a race + * during the first test. */ + if (buf->bo->ptr != NULL) { + remove_from_list(buf); + } + pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); return buf->bo->ptr; + } if (flags & PB_USAGE_DONTBLOCK) { uint32_t domain; @@ -142,14 +168,22 @@ radeon_drm_buffer_map_internal(struct pb_buffer *_buf, if (radeon_bo_map(buf->bo, write)) { return NULL; } - insert_at_tail(&buf->mgr->buffer_map_list, buf); + + pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); + remove_from_list(buf); + pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); return buf->bo->ptr; } static void radeon_drm_buffer_unmap_internal(struct pb_buffer *_buf) { - (void)_buf; + struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); + pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); + if (is_empty_list(buf)) { /* = is not inserted... */ + insert_at_tail(&buf->mgr->buffer_map_list, buf); + } + pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); } static void @@ -163,7 +197,7 @@ radeon_drm_buffer_get_base_buffer(struct pb_buffer *buf, static enum pipe_error -radeon_drm_buffer_validate(struct pb_buffer *_buf, +radeon_drm_buffer_validate(struct pb_buffer *_buf, struct pb_validate *vl, unsigned flags) { @@ -186,8 +220,9 @@ const struct pb_vtbl radeon_drm_buffer_vtbl = { radeon_drm_buffer_get_base_buffer, }; -struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, - uint32_t handle) +static struct pb_buffer * +radeon_drm_bufmgr_create_buffer_from_handle_unsafe(struct pb_manager *_mgr, + uint32_t handle) { struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); struct radeon_libdrm_winsys *rws = mgr->rws; @@ -195,6 +230,7 @@ struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager struct radeon_bo *bo; buf = util_hash_table_get(mgr->buffer_handles, (void*)(uintptr_t)handle); + if (buf) { struct pb_buffer *b = NULL; pb_reference(&b, &buf->base); @@ -228,6 +264,20 @@ struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager return &buf->base; } +struct pb_buffer * +radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, + uint32_t handle) +{ + struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); + struct pb_buffer *pb; + + pipe_mutex_lock(mgr->buffer_handles_mutex); + pb = radeon_drm_bufmgr_create_buffer_from_handle_unsafe(_mgr, handle); + pipe_mutex_unlock(mgr->buffer_handles_mutex); + + return pb; +} + static struct pb_buffer * radeon_drm_bufmgr_create_buffer(struct pb_manager *_mgr, pb_size size, @@ -279,6 +329,8 @@ radeon_drm_bufmgr_destroy(struct pb_manager *_mgr) { struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); util_hash_table_destroy(mgr->buffer_handles); + pipe_mutex_destroy(mgr->buffer_map_list_mutex); + pipe_mutex_destroy(mgr->buffer_handles_mutex); FREE(mgr); } @@ -308,6 +360,8 @@ radeon_drm_bufmgr_create(struct radeon_libdrm_winsys *rws) mgr->rws = rws; make_empty_list(&mgr->buffer_map_list); mgr->buffer_handles = util_hash_table_create(handle_hash, handle_compare); + pipe_mutex_init(mgr->buffer_map_list_mutex); + pipe_mutex_init(mgr->buffer_handles_mutex); return &mgr->base; } @@ -483,6 +537,8 @@ void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr) struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); struct radeon_drm_buffer *rpb, *t_rpb; + pipe_mutex_lock(mgr->buffer_map_list_mutex); + foreach_s(rpb, t_rpb, &mgr->buffer_map_list) { radeon_bo_unmap(rpb->bo); rpb->bo->ptr = NULL; @@ -490,6 +546,8 @@ void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr) } make_empty_list(&mgr->buffer_map_list); + + pipe_mutex_unlock(mgr->buffer_map_list_mutex); } void radeon_drm_bufmgr_wait(struct r300_winsys_screen *ws, diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 5840098642..420522f5c1 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -250,6 +250,7 @@ static void radeon_r300_winsys_cs_destroy(struct r300_winsys_cs *rcs) { struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); radeon_cs_destroy(cs->cs); + FREE(cs); } static void radeon_winsys_destroy(struct r300_winsys_screen *rws) @@ -261,6 +262,8 @@ static void radeon_winsys_destroy(struct r300_winsys_screen *rws) radeon_bo_manager_gem_dtor(ws->bom); radeon_cs_manager_gem_dtor(ws->csm); + + FREE(rws); } boolean radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) diff --git a/src/gallium/winsys/svga/drm/vmw_screen_dri.c b/src/gallium/winsys/svga/drm/vmw_screen_dri.c index 7bd4407e9f..258084a1f1 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c @@ -30,7 +30,6 @@ #include "util/u_format.h" #include "vmw_screen.h" -#include "vmw_screen.h" #include "vmw_surface.h" #include "svga_drm_public.h" diff --git a/src/gallium/winsys/sw/Makefile b/src/gallium/winsys/sw/Makefile index e9182ea5b1..094e811d57 100644 --- a/src/gallium/winsys/sw/Makefile +++ b/src/gallium/winsys/sw/Makefile @@ -4,6 +4,16 @@ include $(TOP)/configs/current SUBDIRS = null wrapper +# TODO: this should go through a further indirection level +# (i.e. EGL should set a variable that is checked here) +ifneq ($(findstring x11, $(EGL_PLATFORMS)),) +SUBDIRS += xlib +endif + +ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) +SUBDIRS += fbdev +endif + default install clean: @for dir in $(SUBDIRS) ; do \ if [ -d $$dir ] ; then \ diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c index 3a76098b65..bc2623e7b7 100644 --- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c +++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c @@ -272,7 +272,7 @@ wsw_destroy(struct sw_winsys *ws) } struct sw_winsys * -wrapper_sw_winsys_warp_pipe_screen(struct pipe_screen *screen) +wrapper_sw_winsys_wrap_pipe_screen(struct pipe_screen *screen) { struct wrapper_sw_winsys *wsw = CALLOC_STRUCT(wrapper_sw_winsys); @@ -304,3 +304,16 @@ err_free: err: return NULL; } + +struct pipe_screen * +wrapper_sw_winsys_dewrap_pipe_screen(struct sw_winsys *ws) +{ + struct wrapper_sw_winsys *wsw = wrapper_sw_winsys(ws); + struct pipe_screen *screen = wsw->screen; + + wsw->pipe->destroy(wsw->pipe); + /* don't destroy the screen its needed later on */ + + FREE(wsw); + return screen; +} diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h index b5c25a3c50..ae0196c432 100644 --- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h +++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h @@ -30,6 +30,15 @@ struct sw_winsys; struct pipe_screen; -struct sw_winsys *wrapper_sw_winsys_warp_pipe_screen(struct pipe_screen *screen); +/* + * Wrap a pipe screen. + */ +struct sw_winsys *wrapper_sw_winsys_wrap_pipe_screen(struct pipe_screen *screen); + +/* + * Destroy the sw_winsys and return the wrapped pipe_screen. + * Not destroying it as sw_winsys::destroy does. + */ +struct pipe_screen *wrapper_sw_winsys_dewrap_pipe_screen(struct sw_winsys *sw_winsys); #endif |