summaryrefslogtreecommitdiff
path: root/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h')
-rw-r--r--src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h2033
1 files changed, 2033 insertions, 0 deletions
diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
new file mode 100644
index 0000000000..a8573cdf68
--- /dev/null
+++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
@@ -0,0 +1,2033 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* used to unbind things, we need 128 due to resources */
+static const void* zero_data[128];
+
+#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
+#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
+#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
+
+#if API >= 11
+template<typename PtrTraits>
+struct GalliumD3D11DeviceContext :
+ public GalliumD3D11DeviceChild<ID3D11DeviceContext>
+{
+#else
+template<bool threadsafe>
+struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
+{
+ typedef simple_ptr_traits PtrTraits;
+ typedef GalliumD3D10Device GalliumD3D10DeviceContext;
+#endif
+
+ refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
+ refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
+ refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
+ refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
+ refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
+ refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
+ refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
+ refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
+
+ refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
+ refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
+ refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
+ refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
+ refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
+ refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
+
+#if API >= 11
+ refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
+ refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
+#endif
+
+ D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
+ D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
+ unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT];
+ D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
+ DXGI_FORMAT index_format;
+ unsigned index_offset;
+ BOOL render_predicate_value;
+ float blend_color[4];
+ unsigned sample_mask;
+ unsigned stencil_ref;
+ bool depth_clamp;
+
+ void* default_input_layout;
+ void* default_rasterizer;
+ void* default_depth_stencil;
+ void* default_blend;
+ void* default_sampler;
+ void* ld_sampler;
+ void * default_shaders[D3D11_STAGES];
+
+ // derived state
+ int primitive_mode;
+ struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
+ struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
+ struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
+ struct
+ {
+ void* ld; // accessed with a -1 index from v
+ void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
+ } sampler_csos[D3D11_STAGES];
+ struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT];
+ unsigned num_shader_resource_views[D3D11_STAGES];
+ unsigned num_samplers[D3D11_STAGES];
+ unsigned num_vertex_buffers;
+ unsigned num_render_target_views;
+ unsigned num_viewports;
+ unsigned num_scissor_rects;
+ unsigned num_so_targets;
+
+ struct pipe_context* pipe;
+ unsigned update_flags;
+
+ bool owns_pipe;
+ unsigned context_flags;
+
+ GalliumD3D11Caps caps;
+
+ cso_context* cso_ctx;
+ gen_mipmap_state* gen_mipmap;
+
+#if API >= 11
+#define SYNCHRONIZED do {} while(0)
+
+ GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
+ : GalliumD3D11DeviceChild(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
+ {
+ caps = device->screen_caps;
+ init_context();
+ }
+
+ ~GalliumD3D11DeviceContext()
+ {
+ destroy_context();
+ }
+#else
+#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
+
+ GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
+ : GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
+ {
+ caps = this->screen_caps;
+ init_context();
+ }
+
+ ~GalliumD3D10Device()
+ {
+ destroy_context();
+ }
+#endif
+
+ void init_context()
+ {
+ if(!pipe->begin_query)
+ caps.queries = false;
+ if(!pipe->render_condition)
+ caps.render_condition = false;
+ if(!pipe->bind_gs_state)
+ {
+ caps.gs = false;
+ caps.stages = 2;
+ }
+ if(!pipe->set_stream_output_buffers)
+ caps.so = false;
+
+ update_flags = 0;
+
+ // pipeline state
+ memset(viewports, 0, sizeof(viewports));
+ memset(scissor_rects, 0, sizeof(scissor_rects));
+ memset(so_offsets, 0, sizeof(so_offsets));
+ primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
+ index_format = DXGI_FORMAT_UNKNOWN;
+ index_offset = 0;
+ render_predicate_value = 0;
+ memset(blend_color, 0, sizeof(blend_color));
+ sample_mask = ~0;
+ stencil_ref = 0;
+ depth_clamp = 0;
+
+ // derived state
+ primitive_mode = 0;
+ memset(vertex_buffers, 0, sizeof(vertex_buffers));
+ memset(so_buffers, 0, sizeof(so_buffers));
+ memset(sampler_views, 0, sizeof(sampler_views));
+ memset(sampler_csos, 0, sizeof(sampler_csos));
+ memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
+ memset(num_samplers, 0, sizeof(num_samplers));
+ num_vertex_buffers = 0;
+ num_render_target_views = 0;
+ num_viewports = 0;
+ num_scissor_rects = 0;
+ num_so_targets = 0;
+
+ default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
+
+ struct pipe_rasterizer_state rasterizerd;
+ memset(&rasterizerd, 0, sizeof(rasterizerd));
+ rasterizerd.gl_rasterization_rules = 1;
+ rasterizerd.cull_face = PIPE_FACE_BACK;
+ default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
+
+ struct pipe_depth_stencil_alpha_state depth_stencild;
+ memset(&depth_stencild, 0, sizeof(depth_stencild));
+ depth_stencild.depth.enabled = TRUE;
+ depth_stencild.depth.writemask = 1;
+ depth_stencild.depth.func = PIPE_FUNC_LESS;
+ default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
+
+ struct pipe_blend_state blendd;
+ memset(&blendd, 0, sizeof(blendd));
+ blendd.rt[0].colormask = 0xf;
+ default_blend = pipe->create_blend_state(pipe, &blendd);
+
+ struct pipe_sampler_state samplerd;
+ memset(&samplerd, 0, sizeof(samplerd));
+ samplerd.normalized_coords = 1;
+ samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+ samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+ samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
+ samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ samplerd.border_color[0] = 1.0f;
+ samplerd.border_color[1] = 1.0f;
+ samplerd.border_color[2] = 1.0f;
+ samplerd.border_color[3] = 1.0f;
+ samplerd.min_lod = -FLT_MAX;
+ samplerd.max_lod = FLT_MAX;
+ samplerd.max_anisotropy = 1;
+ default_sampler = pipe->create_sampler_state(pipe, &samplerd);
+
+ memset(&samplerd, 0, sizeof(samplerd));
+ samplerd.normalized_coords = 0;
+ samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+ samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+ samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+ samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+ samplerd.min_lod = -FLT_MAX;
+ samplerd.max_lod = FLT_MAX;
+ samplerd.max_anisotropy = 1;
+ ld_sampler = pipe->create_sampler_state(pipe, &samplerd);
+
+ for(unsigned s = 0; s < D3D11_STAGES; ++s)
+ {
+ sampler_csos[s].ld = ld_sampler;
+ for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
+ sampler_csos[s].v[i] = default_sampler;
+ }
+
+ // TODO: should this really be empty shaders, or should they be all-passthrough?
+ memset(default_shaders, 0, sizeof(default_shaders));
+ struct ureg_program *ureg;
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ ureg_END(ureg);
+ default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
+
+ ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+ ureg_END(ureg);
+ default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
+
+ cso_ctx = cso_create_context(pipe);
+ gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
+
+ RestoreGalliumState();
+ }
+
+ void destroy_context()
+ {
+ util_destroy_gen_mipmap(gen_mipmap);
+ cso_destroy_context(cso_ctx);
+ pipe->delete_vertex_elements_state(pipe, default_input_layout);
+ pipe->delete_rasterizer_state(pipe, default_rasterizer);
+ pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
+ pipe->delete_blend_state(pipe, default_blend);
+ pipe->delete_sampler_state(pipe, default_sampler);
+ pipe->delete_sampler_state(pipe, ld_sampler);
+ pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
+ pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
+ if(owns_pipe)
+ pipe->destroy(pipe);
+ }
+
+ virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
+ {
+ return context_flags;
+ }
+#if API >= 11
+#define SET_SHADER_EXTRA_ARGS , \
+ __in_ecount_opt(NumClassInstances) ID3D11ClassInstance *const *ppClassInstances, \
+ unsigned NumClassInstances
+#define GET_SHADER_EXTRA_ARGS , \
+ __out_ecount_opt(*pNumClassInstances) ID3D11ClassInstance **ppClassInstances, \
+ __inout_opt unsigned *pNumClassInstances
+#else
+#define SET_SHADER_EXTRA_ARGS
+#define GET_SHADER_EXTRA_ARGS
+#endif
+
+/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
+ * Instead, you have to pass a pointer to nulls to unbind things.
+ * We do the same.
+ * TODO: is D3D10 the same?
+ */
+ template<unsigned s>
+ void xs_set_shader(GalliumD3D11Shader<>* shader)
+ {
+ if(shader != shaders[s].p)
+ {
+ shaders[s] = shader;
+ void* shader_cso = shader ? shader->object : default_shaders[s];
+ switch(s)
+ {
+ case PIPE_SHADER_VERTEX:
+ pipe->bind_vs_state(pipe, shader_cso);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ pipe->bind_fs_state(pipe, shader_cso);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ pipe->bind_gs_state(pipe, shader_cso);
+ break;
+ }
+ update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
+ }
+ }
+
+ template<unsigned s>
+ void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
+ {
+ for(unsigned i = 0; i < count; ++i)
+ {
+ if(constbufs[i] != constant_buffers[s][i].p)
+ {
+ constant_buffers[s][i] = constbufs[i];
+ if(s < caps.stages && start + i < caps.constant_buffers[s])
+ pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
+ }
+ }
+ }
+
+ template<unsigned s>
+ void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
+ {
+ int last_different = -1;
+ for(unsigned i = 0; i < count; ++i)
+ {
+ if(shader_resource_views[s][start + i].p != srvs[i])
+ {
+ shader_resource_views[s][start + i] = srvs[i];
+ sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
+ last_different = i;
+ }
+ }
+ if(last_different >= 0)
+ {
+ num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
+ update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
+ }
+ }
+
+ template<unsigned s>
+ void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
+ {
+ int last_different = -1;
+ for(unsigned i = 0; i < count; ++i)
+ {
+ if(samplers[s][start + i].p != samps[i])
+ {
+ samplers[s][start + i] = samps[i];
+ sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
+ }
+ if(last_different >= 0)
+ {
+ num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
+ update_flags |= (UPDATE_SAMPLERS_SHIFT + s);
+ }
+ }
+ }
+
+#define IMPLEMENT_SHADER_STAGE(XS, Stage) \
+ virtual void STDMETHODCALLTYPE XS##SetShader( \
+ __in_opt ID3D11##Stage##Shader *pShader \
+ SET_SHADER_EXTRA_ARGS) \
+ { \
+ SYNCHRONIZED; \
+ xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
+ } \
+ virtual void STDMETHODCALLTYPE XS##GetShader(\
+ __out ID3D11##Stage##Shader **ppShader \
+ GET_SHADER_EXTRA_ARGS) \
+ { \
+ SYNCHRONIZED; \
+ *ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
+ } \
+ virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
+ __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1) unsigned StartSlot, \
+ __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - StartSlot) unsigned NumBuffers, \
+ __in_ecount(NumBuffers) ID3D11Buffer *const *ppConstantBuffers) \
+ { \
+ SYNCHRONIZED; \
+ xs_set_constant_buffers<D3D11_STAGE_##XS>(StartSlot, NumBuffers, (GalliumD3D11Buffer *const *)ppConstantBuffers); \
+ } \
+ virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
+ __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1) unsigned StartSlot, \
+ __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - StartSlot) unsigned NumBuffers, \
+ __out_ecount(NumBuffers) ID3D11Buffer **ppConstantBuffers) \
+ { \
+ SYNCHRONIZED; \
+ for(unsigned i = 0; i < NumBuffers; ++i) \
+ ppConstantBuffers[i] = constant_buffers[D3D11_STAGE_##XS][StartSlot + i].ref(); \
+ } \
+ virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
+ __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot, \
+ __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumViews, \
+ __in_ecount(NumViews) ID3D11ShaderResourceView *const *ppShaderResourceViews) \
+ { \
+ SYNCHRONIZED; \
+ xs_set_shader_resources<D3D11_STAGE_##XS>(StartSlot, NumViews, (GalliumD3D11ShaderResourceView *const *)ppShaderResourceViews); \
+ } \
+ virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
+ __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot, \
+ __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumViews, \
+ __out_ecount(NumViews) ID3D11ShaderResourceView **ppShaderResourceViews) \
+ { \
+ SYNCHRONIZED; \
+ for(unsigned i = 0; i < NumViews; ++i) \
+ ppShaderResourceViews[i] = shader_resource_views[D3D11_STAGE_##XS][StartSlot + i].ref(); \
+ } \
+ virtual void STDMETHODCALLTYPE XS##SetSamplers(\
+ __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - 1) unsigned StartSlot, \
+ __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - StartSlot) unsigned NumSamplers, \
+ __in_ecount(NumSamplers) ID3D11SamplerState *const *ppSamplers) \
+ { \
+ SYNCHRONIZED; \
+ xs_set_samplers<D3D11_STAGE_##XS>(StartSlot, NumSamplers, (GalliumD3D11SamplerState *const *)ppSamplers); \
+ } \
+ virtual void STDMETHODCALLTYPE XS##GetSamplers( \
+ __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - 1) unsigned StartSlot, \
+ __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - StartSlot) unsigned NumSamplers, \
+ __out_ecount(NumSamplers) ID3D11SamplerState **ppSamplers) \
+ { \
+ SYNCHRONIZED; \
+ for(unsigned i = 0; i < NumSamplers; ++i) \
+ ppSamplers[i] = samplers[D3D11_STAGE_##XS][StartSlot + i].ref(); \
+ }
+
+#define DO_VS(x) x
+#define DO_GS(x) do {if(caps.gs) {x;}} while(0)
+#define DO_PS(x) x
+#define DO_HS(x)
+#define DO_DS(x)
+#define DO_CS(x)
+ IMPLEMENT_SHADER_STAGE(VS, Vertex)
+ IMPLEMENT_SHADER_STAGE(GS, Geometry)
+ IMPLEMENT_SHADER_STAGE(PS, Pixel)
+
+#if API >= 11
+ IMPLEMENT_SHADER_STAGE(HS, Hull)
+ IMPLEMENT_SHADER_STAGE(DS, Domain)
+ IMPLEMENT_SHADER_STAGE(CS, Compute)
+
+ virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
+ __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned StartSlot,
+ __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - StartSlot) unsigned NumUAVs,
+ __in_ecount(NumUAVs) ID3D11UnorderedAccessView *const *ppUnorderedAccessViews,
+ __in_ecount(NumUAVs) const unsigned *pUAVInitialCounts)
+ {
+ SYNCHRONIZED;
+ for(unsigned i = 0; i < NumUAVs; ++i)
+ cs_unordered_access_views[StartSlot + i] = ppUnorderedAccessViews[i];
+ }
+
+ virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
+ __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned StartSlot,
+ __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - StartSlot) unsigned NumUAVs,
+ __out_ecount(NumUAVs) ID3D11UnorderedAccessView **ppUnorderedAccessViews)
+ {
+ SYNCHRONIZED;
+ for(unsigned i = 0; i < NumUAVs; ++i)
+ ppUnorderedAccessViews[i] = cs_unordered_access_views[StartSlot + i].ref();
+ }
+#endif
+
+ template<unsigned s>
+ void update_stage()
+ {
+ if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
+ {
+ while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
+ --num_shader_resource_views[s];
+ if(s < caps.stages)
+ {
+ struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS];
+ unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0;
+ for(unsigned i = 0; i < num_views_to_bind; ++i)
+ {
+ views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]];
+ }
+ switch(s)
+ {
+ case PIPE_SHADER_VERTEX:
+ pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind);
+ break;
+ }
+ }
+ }
+
+ if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
+ {
+ while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1])
+ --num_samplers[s];
+ if(s < caps.stages)
+ {
+ void* samplers_to_bind[PIPE_MAX_SAMPLERS];
+ unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0;
+ for(unsigned i = 0; i < num_samplers_to_bind; ++i)
+ {
+ // index can be -1 to access sampler_csos[s].ld
+ samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]);
+ }
+ switch(s)
+ {
+ case PIPE_SHADER_VERTEX:
+ pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
+ break;
+ }
+ }
+ }
+ }
+
+ void update_state()
+ {
+ update_stage<D3D11_STAGE_PS>();
+ update_stage<D3D11_STAGE_VS>();
+ update_stage<D3D11_STAGE_GS>();
+#if API >= 11
+ update_stage<D3D11_STAGE_HS>();
+ update_stage<D3D11_STAGE_DS>();
+ update_stage<D3D11_STAGE_CS>();
+#endif
+
+ if(update_flags & UPDATE_VERTEX_BUFFERS)
+ {
+ while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
+ --num_vertex_buffers;
+ pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
+ }
+
+ update_flags = 0;
+ }
+
+ virtual void STDMETHODCALLTYPE IASetInputLayout(
+ __in_opt ID3D11InputLayout *pInputLayout)
+ {
+ SYNCHRONIZED;
+ if(pInputLayout != input_layout.p)
+ {
+ input_layout = pInputLayout;
+ pipe->bind_vertex_elements_state(pipe, pInputLayout ? ((GalliumD3D11InputLayout*)pInputLayout)->object : default_input_layout);
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE IAGetInputLayout(
+ __out ID3D11InputLayout **ppInputLayout)
+ {
+ SYNCHRONIZED;
+ *ppInputLayout = input_layout.ref();
+ }
+
+ virtual void STDMETHODCALLTYPE IASetVertexBuffers(
+ __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot,
+ __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumBuffers,
+ __in_ecount(NumBuffers) ID3D11Buffer *const *ppVertexBuffers,
+ __in_ecount(NumBuffers) const unsigned *pStrides,
+ __in_ecount(NumBuffers) const unsigned *pOffsets)
+ {
+ SYNCHRONIZED;
+ int last_different = -1;
+ for(unsigned i = 0; i < NumBuffers; ++i)
+ {
+ ID3D11Buffer* buffer = ppVertexBuffers[i];
+ if(buffer != input_buffers[StartSlot + i].p
+ || vertex_buffers[StartSlot + i].buffer_offset != pOffsets[i]
+ || vertex_buffers[StartSlot + i].stride != pOffsets[i]
+ )
+ {
+ input_buffers[StartSlot + i] = buffer;
+ vertex_buffers[StartSlot + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
+ vertex_buffers[StartSlot + i].buffer_offset = pOffsets[i];
+ vertex_buffers[StartSlot + i].stride = pStrides[i];
+ vertex_buffers[StartSlot + i].max_index = ~0;
+ last_different = i;
+ }
+ }
+ if(last_different >= 0)
+ {
+ num_vertex_buffers = std::max(num_vertex_buffers, StartSlot + NumBuffers);
+ update_flags |= UPDATE_VERTEX_BUFFERS;
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
+ __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot,
+ __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumBuffers,
+ __out_ecount_opt(NumBuffers) ID3D11Buffer **ppVertexBuffers,
+ __out_ecount_opt(NumBuffers) unsigned *pStrides,
+ __out_ecount_opt(NumBuffers) unsigned *pOffsets)
+ {
+ SYNCHRONIZED;
+ if(ppVertexBuffers)
+ {
+ for(unsigned i = 0; i < NumBuffers; ++i)
+ ppVertexBuffers[i] = input_buffers[StartSlot + i].ref();
+ }
+
+ if(pOffsets)
+ {
+ for(unsigned i = 0; i < NumBuffers; ++i)
+ pOffsets[i] = vertex_buffers[StartSlot + i].buffer_offset;
+ }
+
+ if(pStrides)
+ {
+ for(unsigned i = 0; i < NumBuffers; ++i)
+ pStrides[i] = vertex_buffers[StartSlot + i].stride;
+ }
+ }
+
+ void set_index_buffer()
+ {
+ pipe_index_buffer ib;
+ if(!index_buffer)
+ {
+ memset(&ib, 0, sizeof(ib));
+ }
+ else
+ {
+ if(index_format == DXGI_FORMAT_R32_UINT)
+ ib.index_size = 4;
+ else if(index_format == DXGI_FORMAT_R16_UINT)
+ ib.index_size = 2;
+ else
+ ib.index_size = 1;
+ ib.offset = index_offset;
+ ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
+ }
+ pipe->set_index_buffer(pipe, &ib);
+ }
+
+ virtual void STDMETHODCALLTYPE IASetIndexBuffer(
+ __in_opt ID3D11Buffer *pIndexBuffer,
+ __in DXGI_FORMAT Format,
+ __in unsigned Offset)
+ {
+ SYNCHRONIZED;
+ if(index_buffer.p != pIndexBuffer || index_format != Format || index_offset != Offset)
+ {
+ index_buffer = pIndexBuffer;
+ index_format = Format;
+ index_offset = Offset;
+
+ set_index_buffer();
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
+ __out_opt ID3D11Buffer **pIndexBuffer,
+ __out_opt DXGI_FORMAT *Format,
+ __out_opt unsigned *Offset)
+ {
+ SYNCHRONIZED;
+ if(pIndexBuffer)
+ *pIndexBuffer = index_buffer.ref();
+ if(Format)
+ *Format = index_format;
+ if(Offset)
+ *Offset = index_offset;
+ }
+
+ virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
+ __in D3D11_PRIMITIVE_TOPOLOGY Topology)
+ {
+ SYNCHRONIZED;
+ if(primitive_topology != Topology)
+ {
+ if(Topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
+ primitive_mode = d3d_to_pipe_prim[Topology];
+ else
+ primitive_mode = 0;
+ primitive_topology = Topology;
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
+ __out D3D11_PRIMITIVE_TOPOLOGY *pTopology)
+ {
+ SYNCHRONIZED;
+ *pTopology = primitive_topology;
+ }
+
+ virtual void STDMETHODCALLTYPE DrawIndexed(
+ __in unsigned IndexCount,
+ __in unsigned StartIndexLocation,
+ __in int BaseVertexLocation)
+ {
+ SYNCHRONIZED;
+ if(update_flags)
+ update_state();
+
+ pipe_draw_info info;
+ info.mode = primitive_mode;
+ info.indexed = TRUE;
+ info.count = IndexCount;
+ info.start = StartIndexLocation;
+ info.index_bias = BaseVertexLocation;
+ info.min_index = 0;
+ info.max_index = ~0;
+ info.start_instance = 0;
+ info.instance_count = 1;
+
+ pipe->draw_vbo(pipe, &info);
+ }
+
+ virtual void STDMETHODCALLTYPE Draw(
+ __in unsigned VertexCount,
+ __in unsigned StartVertexLocation)
+ {
+ SYNCHRONIZED;
+ if(update_flags)
+ update_state();
+
+ pipe_draw_info info;
+ info.mode = primitive_mode;
+ info.indexed = FALSE;
+ info.count = VertexCount;
+ info.start = StartVertexLocation;
+ info.index_bias = 0;
+ info.min_index = 0;
+ info.max_index = ~0;
+ info.start_instance = 0;
+ info.instance_count = 1;
+
+ pipe->draw_vbo(pipe, &info);
+ }
+
+ virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
+ __in unsigned IndexCountPerInstance,
+ __in unsigned InstanceCount,
+ __in unsigned StartIndexLocation,
+ __in int BaseVertexLocation,
+ __in unsigned StartInstanceLocation)
+ {
+ SYNCHRONIZED;
+ if(update_flags)
+ update_state();
+
+ pipe_draw_info info;
+ info.mode = primitive_mode;
+ info.indexed = TRUE;
+ info.count = IndexCountPerInstance;
+ info.start = StartIndexLocation;
+ info.index_bias = BaseVertexLocation;
+ info.min_index = 0;
+ info.max_index = ~0;
+ info.start_instance = StartInstanceLocation;
+ info.instance_count = InstanceCount;
+
+ pipe->draw_vbo(pipe, &info);
+ }
+
+ virtual void STDMETHODCALLTYPE DrawInstanced(
+ __in unsigned VertexCountPerInstance,
+ __in unsigned InstanceCount,
+ __in unsigned StartVertexLocation,
+ __in unsigned StartInstanceLocation)
+ {
+ SYNCHRONIZED;
+ if(update_flags)
+ update_state();
+
+ pipe_draw_info info;
+ info.mode = primitive_mode;
+ info.indexed = FALSE;
+ info.count = VertexCountPerInstance;
+ info.start = StartVertexLocation;
+ info.index_bias = 0;
+ info.min_index = 0;
+ info.max_index = ~0;
+ info.start_instance = StartInstanceLocation;
+ info.instance_count = InstanceCount;
+
+ pipe->draw_vbo(pipe, &info);
+ }
+
+ virtual void STDMETHODCALLTYPE DrawAuto(void)
+ {
+ if(!caps.so)
+ return;
+
+ SYNCHRONIZED;
+ if(update_flags)
+ update_state();
+
+ pipe->draw_stream_output(pipe, primitive_mode);
+ }
+
+ virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
+ __in ID3D11Buffer *pBufferForArgs,
+ __in unsigned AlignedByteOffsetForArgs)
+ {
+ SYNCHRONIZED;
+ if(update_flags)
+ update_state();
+
+ struct {
+ unsigned count;
+ unsigned instance_count;
+ unsigned start;
+ unsigned index_bias;
+ } data;
+
+ pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)pBufferForArgs)->resource, AlignedByteOffsetForArgs, sizeof(data), &data);
+
+ pipe_draw_info info;
+ info.mode = primitive_mode;
+ info.indexed = TRUE;
+ info.start = data.start;
+ info.count = data.count;
+ info.index_bias = data.index_bias;
+ info.min_index = 0;
+ info.max_index = ~0;
+ info.start_instance = 0;
+ info.instance_count = data.instance_count;
+
+ pipe->draw_vbo(pipe, &info);
+ }
+
+ virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
+ __in ID3D11Buffer *pBufferForArgs,
+ __in unsigned AlignedByteOffsetForArgs)
+ {
+ SYNCHRONIZED;
+ if(update_flags)
+ update_state();
+
+ struct {
+ unsigned count;
+ unsigned instance_count;
+ unsigned start;
+ } data;
+
+ pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)pBufferForArgs)->resource, AlignedByteOffsetForArgs, sizeof(data), &data);
+
+ pipe_draw_info info;
+ info.mode = primitive_mode;
+ info.indexed = FALSE;
+ info.start = data.start;
+ info.count = data.count;
+ info.index_bias = 0;
+ info.min_index = 0;
+ info.max_index = ~0;
+ info.start_instance = 0;
+ info.instance_count = data.instance_count;
+
+ pipe->draw_vbo(pipe, &info);
+ }
+
+#if API >= 11
+ virtual void STDMETHODCALLTYPE Dispatch(
+ __in unsigned ThreadGroupCountX,
+ __in unsigned ThreadGroupCountY,
+ __in unsigned ThreadGroupCountZ)
+ {
+// uncomment this when this is implemented
+// SYNCHRONIZED;
+// if(update_flags)
+// update_state();
+ }
+
+ virtual void STDMETHODCALLTYPE DispatchIndirect(
+ __in ID3D11Buffer *pBufferForArgs,
+ __in unsigned AlignedByteOffsetForArgs)
+ {
+// uncomment this when this is implemented
+// SYNCHRONIZED;
+// if(update_flags)
+// update_state();
+ }
+#endif
+
+ void set_clip()
+ {
+ SYNCHRONIZED;
+ pipe_clip_state clip;
+ clip.nr = 0;
+ clip.depth_clamp = depth_clamp;
+ pipe->set_clip_state(pipe, &clip);
+ }
+
+ virtual void STDMETHODCALLTYPE RSSetState(
+ __in_opt ID3D11RasterizerState *pRasterizerState)
+ {
+ SYNCHRONIZED;
+ if(pRasterizerState != rasterizer_state.p)
+ {
+ rasterizer_state = pRasterizerState;
+ pipe->bind_rasterizer_state(pipe, pRasterizerState ? ((GalliumD3D11RasterizerState*)pRasterizerState)->object : default_rasterizer);
+ bool new_depth_clamp = pRasterizerState ? ((GalliumD3D11RasterizerState*)pRasterizerState)->depth_clamp : false;
+ if(depth_clamp != new_depth_clamp)
+ {
+ depth_clamp = new_depth_clamp;
+ set_clip();
+ }
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE RSGetState(
+ __out ID3D11RasterizerState **ppRasterizerState)
+ {
+ SYNCHRONIZED;
+ *ppRasterizerState = rasterizer_state.ref();
+ }
+
+ void set_viewport()
+ {
+ // TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
+ pipe_viewport_state viewport;
+ float half_width = viewports[0].Width * 0.5f;
+ float half_height = viewports[0].Height * 0.5f;
+
+ viewport.scale[0] = half_width;
+ viewport.scale[1] = -half_height;
+ viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
+ viewport.scale[3] = 1.0f;
+ viewport.translate[0] = half_width + viewports[0].TopLeftX;
+ viewport.translate[1] = half_height + viewports[0].TopLeftY;
+ viewport.translate[2] = viewports[0].MinDepth;
+ viewport.translate[3] = 1.0f;
+ pipe->set_viewport_state(pipe, &viewport);
+ }
+
+ virtual void STDMETHODCALLTYPE RSSetViewports(
+ __in_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned NumViewports,
+ __in_ecount_opt(NumViewports) const D3D11_VIEWPORT *pViewports)
+ {
+ SYNCHRONIZED;
+ if(NumViewports)
+ {
+ if(memcmp(&viewports[0], &pViewports[0], sizeof(viewports[0])))
+ {
+ viewports[0] = pViewports[0];
+ set_viewport();
+ }
+ for(unsigned i = 1; i < NumViewports; ++i)
+ viewports[i] = pViewports[i];
+ }
+ else if(num_viewports)
+ {
+ // TODO: what should we do here?
+ memset(&viewports[0], 0, sizeof(viewports[0]));
+ set_viewport();
+ }
+ num_viewports = NumViewports;
+ }
+
+ virtual void STDMETHODCALLTYPE RSGetViewports(
+ __inout_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned *pNumViewports,
+ __out_ecount_opt(*pNumViewports) D3D11_VIEWPORT *pViewports)
+ {
+ SYNCHRONIZED;
+ if(pViewports)
+ {
+ unsigned i;
+ for(i = 0; i < std::min(*pNumViewports, num_viewports); ++i)
+ pViewports[i] = viewports[i];
+
+ memset(pViewports + i, 0, (*pNumViewports - i) * sizeof(D3D11_VIEWPORT));
+ }
+
+ *pNumViewports = num_viewports;
+ }
+
+ void set_scissor()
+ {
+ pipe_scissor_state scissor;
+ scissor.minx = scissor_rects[0].left;
+ scissor.miny = scissor_rects[0].top;
+ scissor.maxx = scissor_rects[0].right;
+ scissor.maxy = scissor_rects[0].bottom;
+ pipe->set_scissor_state(pipe, &scissor);
+ }
+
+ virtual void STDMETHODCALLTYPE RSSetScissorRects(
+ __in_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned NumRects,
+ __in_ecount_opt(NumRects) const D3D11_RECT *pRects)
+ {
+ SYNCHRONIZED;
+ if(NumRects)
+ {
+ if(memcmp(&scissor_rects[0], &pRects[0], sizeof(scissor_rects[0])))
+ {
+ scissor_rects[0] = pRects[0];
+ set_scissor();
+ }
+ for(unsigned i = 1; i < NumRects; ++i)
+ scissor_rects[i] = pRects[i];
+ }
+ else if(num_scissor_rects)
+ {
+ // TODO: what should we do here?
+ memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
+ set_scissor();
+ }
+
+ num_scissor_rects = NumRects;
+ }
+
+ virtual void STDMETHODCALLTYPE RSGetScissorRects(
+ __inout_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned *pNumRects,
+ __out_ecount_opt(*pNumRects) D3D11_RECT *pRects)
+ {
+ SYNCHRONIZED;
+ if(pRects)
+ {
+ unsigned i;
+ for(i = 0; i < std::min(*pNumRects, num_scissor_rects); ++i)
+ pRects[i] = scissor_rects[i];
+
+ memset(pRects + i, 0, (*pNumRects - i) * sizeof(D3D11_RECT));
+ }
+
+ *pNumRects = num_scissor_rects;
+ }
+
+ virtual void STDMETHODCALLTYPE OMSetBlendState(
+ __in_opt ID3D11BlendState *pBlendState,
+ __in_opt const float BlendFactor[ 4 ],
+ __in unsigned SampleMask)
+ {
+ SYNCHRONIZED;
+ float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
+
+ if(blend_state.p != pBlendState)
+ {
+ pipe->bind_blend_state(pipe, pBlendState ? ((GalliumD3D11BlendState*)pBlendState)->object : default_blend);
+ blend_state = pBlendState;
+ }
+
+ // Windows D3D11 does this, even though it's apparently undocumented
+ if(!BlendFactor)
+ BlendFactor = white;
+
+ if(memcmp(blend_color, BlendFactor, sizeof(blend_color)))
+ {
+ pipe->set_blend_color(pipe, (struct pipe_blend_color*)BlendFactor);
+ memcpy(blend_color, BlendFactor, sizeof(blend_color));
+ }
+
+ if(sample_mask != SampleMask)
+ {
+ pipe->set_sample_mask(pipe, sample_mask);
+ sample_mask = SampleMask;
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE OMGetBlendState(
+ __out_opt ID3D11BlendState **ppBlendState,
+ __out_opt float BlendFactor[ 4 ],
+ __out_opt unsigned *pSampleMask)
+ {
+ SYNCHRONIZED;
+ if(ppBlendState)
+ *ppBlendState = blend_state.ref();
+ if(BlendFactor)
+ memcpy(BlendFactor, blend_color, sizeof(blend_color));
+ if(pSampleMask)
+ *pSampleMask = sample_mask;
+ }
+
+ void set_stencil_ref()
+ {
+ struct pipe_stencil_ref sref;
+ sref.ref_value[0] = stencil_ref;
+ sref.ref_value[1] = stencil_ref;
+ pipe->set_stencil_ref(pipe, &sref);
+ }
+
+ virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
+ __in_opt ID3D11DepthStencilState *pDepthStencilState,
+ __in unsigned StencilRef)
+ {
+ SYNCHRONIZED;
+ if(pDepthStencilState != depth_stencil_state.p)
+ {
+ pipe->bind_depth_stencil_alpha_state(pipe, pDepthStencilState ? ((GalliumD3D11DepthStencilState*)pDepthStencilState)->object : default_depth_stencil);
+ depth_stencil_state = pDepthStencilState;
+ }
+
+ if(StencilRef != stencil_ref)
+ {
+ stencil_ref = StencilRef;
+ set_stencil_ref();
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
+ __out_opt ID3D11DepthStencilState **ppDepthStencilState,
+ __out_opt unsigned *pStencilRef)
+ {
+ SYNCHRONIZED;
+ if(*ppDepthStencilState)
+ *ppDepthStencilState = depth_stencil_state.ref();
+ if(pStencilRef)
+ *pStencilRef = stencil_ref;
+ }
+
+ void set_framebuffer()
+ {
+ struct pipe_framebuffer_state fb;
+ memset(&fb, 0, sizeof(fb));
+ if(depth_stencil_view)
+ {
+ struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
+ fb.zsbuf = surf;
+ if(surf->width > fb.width)
+ fb.width = surf->width;
+ if(surf->height > fb.height)
+ fb.height = surf->height;
+ }
+ fb.nr_cbufs = num_render_target_views;
+ unsigned i;
+ for(i = 0; i < num_render_target_views; ++i)
+ {
+ if(render_target_views[i])
+ {
+ struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
+ fb.cbufs[i] = surf;
+ if(surf->width > fb.width)
+ fb.width = surf->width;
+ if(surf->height > fb.height)
+ fb.height = surf->height;
+ }
+ }
+
+ pipe->set_framebuffer_state(pipe, &fb);
+ }
+
+ /* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
+ * Hopefully nobody relies on this happening
+ */
+
+ virtual void STDMETHODCALLTYPE OMSetRenderTargets(
+ __in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT) unsigned NumViews,
+ __in_ecount_opt(NumViews) ID3D11RenderTargetView *const *ppRenderTargetViews,
+ __in_opt ID3D11DepthStencilView *pDepthStencilView)
+ {
+ SYNCHRONIZED;
+ if(!ppRenderTargetViews)
+ NumViews = 0;
+ if(NumViews == num_render_target_views)
+ {
+ for(unsigned i = 0; i < NumViews; ++i)
+ {
+ if(ppRenderTargetViews[i] != render_target_views[i].p)
+ goto changed;
+ }
+ return;
+ }
+changed:
+ depth_stencil_view = pDepthStencilView;
+ unsigned i;
+ for(i = 0; i < NumViews; ++i)
+ {
+ render_target_views[i] = ppRenderTargetViews[i];
+#if API >= 11
+ om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
+#endif
+ }
+ for(; i < num_render_target_views; ++i)
+ render_target_views[i] = (ID3D11RenderTargetView*)NULL;
+ num_render_target_views = NumViews;
+ set_framebuffer();
+ }
+
+ virtual void STDMETHODCALLTYPE OMGetRenderTargets(
+ __in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT) unsigned NumViews,
+ __out_ecount_opt(NumViews) ID3D11RenderTargetView **ppRenderTargetViews,
+ __out_opt ID3D11DepthStencilView **ppDepthStencilView)
+ {
+ SYNCHRONIZED;
+ if(ppRenderTargetViews)
+ {
+ unsigned i;
+ for(i = 0; i < std::min(num_render_target_views, NumViews); ++i)
+ ppRenderTargetViews[i] = render_target_views[i].ref();
+
+ for(; i < NumViews; ++i)
+ ppRenderTargetViews[i] = 0;
+ }
+
+ if(ppDepthStencilView)
+ *ppDepthStencilView = depth_stencil_view.ref();
+ }
+
+#if API >= 11
+ /* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
+ virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
+ __in unsigned NumRTVs,
+ __in_ecount_opt(NumRTVs) ID3D11RenderTargetView *const *ppRenderTargetViews,
+ __in_opt ID3D11DepthStencilView *pDepthStencilView,
+ __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned UAVStartSlot,
+ __in unsigned NumUAVs,
+ __in_ecount_opt(NumUAVs) ID3D11UnorderedAccessView *const *ppUnorderedAccessViews,
+ __in_ecount_opt(NumUAVs) const unsigned *pUAVInitialCounts)
+ {
+ SYNCHRONIZED;
+ if(NumRTVs != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
+ OMSetRenderTargets(NumRTVs, ppRenderTargetViews, pDepthStencilView);
+
+ if(NumUAVs != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
+ {
+ for(unsigned i = 0; i < NumUAVs; ++i)
+ {
+ om_unordered_access_views[UAVStartSlot + i] = ppUnorderedAccessViews[i];
+ render_target_views[UAVStartSlot + i] = (ID3D11RenderTargetView*)0;
+ }
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
+ __in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT) unsigned NumRTVs,
+ __out_ecount_opt(NumRTVs) ID3D11RenderTargetView **ppRenderTargetViews,
+ __out_opt ID3D11DepthStencilView **ppDepthStencilView,
+ __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned UAVStartSlot,
+ __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - UAVStartSlot) unsigned NumUAVs,
+ __out_ecount_opt(NumUAVs) ID3D11UnorderedAccessView **ppUnorderedAccessViews)
+ {
+ SYNCHRONIZED;
+ if(ppRenderTargetViews)
+ OMGetRenderTargets(NumRTVs, ppRenderTargetViews, ppDepthStencilView);
+
+ if(ppUnorderedAccessViews)
+ {
+ for(unsigned i = 0; i < NumUAVs; ++i)
+ ppUnorderedAccessViews[i] = om_unordered_access_views[UAVStartSlot + i].ref();
+ }
+ }
+#endif
+
+ virtual void STDMETHODCALLTYPE SOSetTargets(
+ __in_range(0, D3D11_SO_BUFFER_SLOT_COUNT) unsigned NumBuffers,
+ __in_ecount_opt(NumBuffers) ID3D11Buffer *const *ppSOTargets,
+ __in_ecount_opt(NumBuffers) const unsigned *pOffsets)
+ {
+ SYNCHRONIZED;
+ unsigned i;
+ if(!ppSOTargets)
+ NumBuffers = 0;
+ bool changed = false;
+ for(i = 0; i < NumBuffers; ++i)
+ {
+ ID3D11Buffer* buffer = ppSOTargets[i];
+ if(buffer != so_targets[i].p || pOffsets[i] != so_offsets[i])
+ {
+ so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
+ so_targets[i] = buffer;
+ so_offsets[i] = pOffsets[i];
+ changed = true;
+ }
+ }
+ for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i)
+ {
+ if(so_targets[i].p || so_offsets[i])
+ {
+ changed = true;
+ so_targets[i] = (ID3D11Buffer*)0;
+ so_offsets[i] = 0;
+ }
+ }
+ num_so_targets = NumBuffers;
+
+ if(changed && caps.so)
+ pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
+ }
+
+ virtual void STDMETHODCALLTYPE SOGetTargets(
+ __in_range(0, D3D11_SO_BUFFER_SLOT_COUNT) unsigned NumBuffers,
+ __out_ecount(NumBuffers) ID3D11Buffer **ppSOTargets
+#if API < 11
+ , __out_ecount(NumBuffers) UINT *pOffsets
+#endif
+ )
+ {
+ SYNCHRONIZED;
+ for(unsigned i = 0; i < NumBuffers; ++i)
+ {
+ ppSOTargets[i] = so_targets[i].ref();
+#if API < 11
+ pOffsets[i] = so_offsets[i];
+#endif
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE Begin(
+ __in ID3D11Asynchronous *pAsync)
+ {
+ SYNCHRONIZED;
+ if(caps.queries)
+ pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query);
+ }
+
+ virtual void STDMETHODCALLTYPE End(
+ __in ID3D11Asynchronous *pAsync)
+ {
+ SYNCHRONIZED;
+ if(caps.queries)
+ pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query);
+ }
+
+ virtual HRESULT STDMETHODCALLTYPE GetData(
+ __in ID3D11Asynchronous *pAsync,
+ __out_bcount_opt(DataSize) void *pData,
+ __in unsigned DataSize,
+ __in unsigned GetDataFlags)
+ {
+ SYNCHRONIZED;
+ if(!caps.queries)
+ return E_NOTIMPL;
+
+ GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)pAsync;
+ void* data = alloca(async->data_size);
+ boolean ret = pipe->get_query_result(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query, !(GetDataFlags & D3D11_ASYNC_GETDATA_DONOTFLUSH), data);
+ if(pData)
+ memcpy(pData, data, std::min(async->data_size, DataSize));
+ return ret ? S_OK : S_FALSE;
+ }
+
+ void set_render_condition()
+ {
+ if(caps.render_condition)
+ {
+ if(!render_predicate)
+ pipe->render_condition(pipe, 0, 0);
+ else
+ {
+ GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
+ if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
+ {
+ unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
+ pipe->render_condition(pipe, predicate->query, mode);
+ }
+ else
+ {
+ /* TODO: add inverted predication to Gallium*/
+ pipe->render_condition(pipe, 0, 0);
+ }
+ }
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE SetPredication(
+ __in_opt ID3D11Predicate *pPredicate,
+ __in BOOL PredicateValue)
+ {
+ SYNCHRONIZED;
+ if(render_predicate.p != pPredicate || render_predicate_value != PredicateValue)
+ {
+ render_predicate = pPredicate;
+ render_predicate_value = PredicateValue;
+ set_render_condition();
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE GetPredication(
+ __out_opt ID3D11Predicate **ppPredicate,
+ __out_opt BOOL *pPredicateValue)
+ {
+ SYNCHRONIZED;
+ if(ppPredicate)
+ *ppPredicate = render_predicate.ref();
+ if(pPredicateValue)
+ *pPredicateValue = render_predicate_value;
+ }
+
+ static pipe_subresource d3d11_to_pipe_subresource(struct pipe_resource* resource, unsigned subresource)
+ {
+ pipe_subresource sr;
+ if(subresource <= resource->last_level)
+ {
+ sr.level = subresource;
+ sr.face = 0;
+ }
+ else
+ {
+ unsigned levels = resource->last_level + 1;
+ sr.level = subresource % levels;
+ sr.face = subresource / levels;
+ }
+ return sr;
+ }
+
+ virtual HRESULT STDMETHODCALLTYPE Map(
+ __in ID3D11Resource *pResource,
+ __in unsigned Subresource,
+ __in D3D11_MAP MapType,
+ __in unsigned MapFlags,
+ __out D3D11_MAPPED_SUBRESOURCE *pMappedResource)
+ {
+ SYNCHRONIZED;
+ GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
+ if(resource->transfers.count(Subresource))
+ return E_FAIL;
+ pipe_subresource sr = d3d11_to_pipe_subresource(resource->resource, Subresource);
+ pipe_box box;
+ d3d11_to_pipe_box(resource->resource, sr.level, 0);
+ unsigned usage = 0;
+ if(MapType == D3D11_MAP_READ)
+ usage = PIPE_TRANSFER_READ;
+ else if(MapType == D3D11_MAP_WRITE)
+ usage = PIPE_TRANSFER_WRITE;
+ else if(MapType == D3D11_MAP_READ_WRITE)
+ usage = PIPE_TRANSFER_READ_WRITE;
+ else if(MapType == D3D11_MAP_WRITE_DISCARD)
+ usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
+ else if(MapType == D3D11_MAP_WRITE_NO_OVERWRITE)
+ usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
+ else
+ return E_INVALIDARG;
+ if(MapType & D3D10_MAP_FLAG_DO_NOT_WAIT)
+ usage |= PIPE_TRANSFER_DONTBLOCK;
+ struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, sr, usage, &box);
+ if(!transfer) {
+ if(MapType & D3D10_MAP_FLAG_DO_NOT_WAIT)
+ return DXGI_ERROR_WAS_STILL_DRAWING;
+ else
+ return E_FAIL;
+ }
+ resource->transfers[Subresource] = transfer;
+ pipe->transfer_map(pipe, transfer);
+ pMappedResource->pData = transfer->data;
+ pMappedResource->RowPitch = transfer->stride;
+ pMappedResource->DepthPitch = transfer->slice_stride;
+ return S_OK;
+ }
+
+ virtual void STDMETHODCALLTYPE Unmap(
+ __in ID3D11Resource *pResource,
+ __in unsigned Subresource)
+ {
+ SYNCHRONIZED;
+ GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
+ std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(Subresource);
+ if(i != resource->transfers.end())
+ {
+ pipe->transfer_unmap(pipe, i->second);
+ pipe->transfer_destroy(pipe, i->second);
+ resource->transfers.erase(i);
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE CopySubresourceRegion(
+ __in ID3D11Resource *pDstResource,
+ __in unsigned DstSubresource,
+ __in unsigned DstX,
+ __in unsigned DstY,
+ __in unsigned DstZ,
+ __in ID3D11Resource *pSrcResource,
+ __in unsigned SrcSubresource,
+ __in_opt const D3D11_BOX *pSrcBox)
+ {
+ SYNCHRONIZED;
+ GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
+ GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource;
+ pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource);
+ pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, SrcSubresource);
+ pipe_box box = d3d11_to_pipe_box(src->resource, subsrc.level, pSrcBox);
+ for(unsigned i = 0; i < box.depth; ++i)
+ {
+ pipe->resource_copy_region(pipe,
+ dst->resource, subdst, DstX, DstY, DstZ + i,
+ src->resource, subsrc, box.x, box.y, box.z + i,
+ box.width, box.height);
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE CopyResource(
+ __in ID3D11Resource *pDstResource,
+ __in ID3D11Resource *pSrcResource)
+ {
+ SYNCHRONIZED;
+ GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
+ GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource;
+ pipe_subresource sr;
+ unsigned faces = dst->resource->target == PIPE_TEXTURE_CUBE ? 6 : 1;
+
+ for(sr.face = 0; sr.face < faces; ++sr.face)
+ {
+ for(sr.level = 0; sr.level <= dst->resource->last_level; ++sr.level)
+ {
+ unsigned w = u_minify(dst->resource->width0, sr.level);
+ unsigned h = u_minify(dst->resource->height0, sr.level);
+ unsigned d = u_minify(dst->resource->depth0, sr.level);
+ for(unsigned i = 0; i < d; ++i)
+ {
+ pipe->resource_copy_region(pipe,
+ dst->resource, sr, 0, 0, i,
+ src->resource, sr, 0, 0, i,
+ w, h);
+ }
+ }
+ }
+ }
+
+ virtual void STDMETHODCALLTYPE UpdateSubresource(
+ __in ID3D11Resource *pDstResource,
+ __in unsigned DstSubresource,
+ __in_opt const D3D11_BOX *pDstBox,
+ __in const void *pSrcData,
+ __in unsigned SrcRowPitch,
+ __in unsigned SrcDepthPitch)
+ {
+ SYNCHRONIZED;
+ GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
+ pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource);
+ pipe_box box = d3d11_to_pipe_box(dst->resource, subdst.level, pDstBox);
+ pipe->transfer_inline_write(pipe, dst->resource, subdst, PIPE_TRANSFER_WRITE, &box, pSrcData, SrcRowPitch, SrcDepthPitch);
+ }
+
+#if API >= 11
+ virtual void STDMETHODCALLTYPE CopyStructureCount(
+ __in ID3D11Buffer *pDstBuffer,
+ __in unsigned DstAlignedByteOffset,
+ __in ID3D11UnorderedAccessView *pSrcView)
+ {
+ SYNCHRONIZED;
+ }
+#endif
+
+ virtual void STDMETHODCALLTYPE ClearRenderTargetView(
+ __in ID3D11RenderTargetView *pRenderTargetView,
+ __in const float ColorRGBA[4])
+ {
+ SYNCHRONIZED;
+ GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)pRenderTargetView);
+ pipe->clear_render_target(pipe, view->object, ColorRGBA, 0, 0, view->object->width, view->object->height);
+ }
+
+ virtual void STDMETHODCALLTYPE ClearDepthStencilView(
+ __in ID3D11DepthStencilView *pDepthStencilView,
+ __in unsigned ClearFlags,
+ __in float Depth,
+ __in UINT8 Stencil)
+ {
+ SYNCHRONIZED;
+ GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)pDepthStencilView);
+ unsigned flags = 0;
+ if(ClearFlags & D3D11_CLEAR_DEPTH)
+ flags |= PIPE_CLEAR_DEPTH;
+ if(ClearFlags & D3D11_CLEAR_STENCIL)
+ flags |= PIPE_CLEAR_STENCIL;
+ pipe->clear_depth_stencil(pipe, view->object, flags, Depth, Stencil, 0, 0, view->object->width, view->object->height);
+ }
+
+#if API >= 11
+ virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
+ __in ID3D11UnorderedAccessView *pUnorderedAccessView,
+ __in const unsigned Values[ 4 ])
+ {
+ SYNCHRONIZED;
+ }
+
+ virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
+ __in ID3D11UnorderedAccessView *pUnorderedAccessView,
+ __in const float Values[ 4 ])
+ {
+ SYNCHRONIZED;
+ }
+#endif
+
+ virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
+ {
+ pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
+ pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
+ pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
+ pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
+ pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
+ pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
+ if(caps.gs)
+ pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
+ set_framebuffer();
+ set_viewport();
+ set_clip();
+ set_render_condition();
+ // TODO: restore stream output
+
+ update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
+ }
+
+ virtual void STDMETHODCALLTYPE GenerateMips(
+ __in ID3D11ShaderResourceView *pShaderResourceView)
+ {
+ SYNCHRONIZED;
+
+ GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)pShaderResourceView;
+ if(caps.gs)
+ pipe->bind_gs_state(pipe, 0);
+ if(caps.so)
+ pipe->bind_stream_output_state(pipe, 0);
+ if(pipe->render_condition)
+ pipe->render_condition(pipe, 0, 0);
+ util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR);
+ RestoreGalliumStateBlitOnly();
+ }
+
+ virtual void STDMETHODCALLTYPE RestoreGalliumState()
+ {
+ SYNCHRONIZED;
+ RestoreGalliumStateBlitOnly();
+
+ set_index_buffer();
+ set_stencil_ref();
+ pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
+ pipe->set_sample_mask(pipe, sample_mask);
+
+ for(unsigned s = 0; s < 3; ++s)
+ {
+ unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
+ for(unsigned i = 0; i < num; ++i)
+ pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
+ }
+
+ if(caps.so)
+ pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
+
+ update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
+ update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
+
+ set_scissor();
+ }
+
+#if API >= 11
+ /* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
+ virtual void STDMETHODCALLTYPE SetResourceMinLOD(
+ __in ID3D11Resource *pResource,
+ float MinLOD)
+ {
+ SYNCHRONIZED;
+ GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
+ if(resource->min_lod != MinLOD)
+ {
+ // TODO: actually do anything?
+ resource->min_lod = MinLOD;
+ }
+ }
+
+ virtual float STDMETHODCALLTYPE GetResourceMinLOD(
+ __in ID3D11Resource *pResource)
+ {
+ SYNCHRONIZED;
+ GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
+ return resource->min_lod;
+ }
+#endif
+
+ virtual void STDMETHODCALLTYPE ResolveSubresource(
+ __in ID3D11Resource *pDstResource,
+ __in unsigned DstSubresource,
+ __in ID3D11Resource *pSrcResource,
+ __in unsigned SrcSubresource,
+ __in DXGI_FORMAT Format)
+ {
+ SYNCHRONIZED;
+ GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
+ GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource;
+ pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource);
+ pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, SrcSubresource);
+ pipe->resource_resolve(pipe, dst->resource, subdst, src->resource, subsrc);
+ }
+
+#if API >= 11
+ virtual void STDMETHODCALLTYPE ExecuteCommandList(
+ __in ID3D11CommandList *pCommandList,
+ BOOL RestoreContextState)
+ {
+ SYNCHRONIZED;
+ }
+
+ virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
+ BOOL RestoreDeferredContextState,
+ __out_opt ID3D11CommandList **ppCommandList)
+ {
+ SYNCHRONIZED;
+ return E_NOTIMPL;
+ }
+#endif
+
+ virtual void STDMETHODCALLTYPE ClearState(void)
+ {
+ SYNCHRONIZED;
+
+ // we qualify all calls so that we avoid virtual dispatch and might get them inlined
+ // TODO: make sure all this gets inlined, which might require more compiler flags
+ // TODO: optimize this
+#if API >= 11
+ GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
+ GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
+ GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
+ GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
+ GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
+ GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
+#else
+ GalliumD3D11DeviceContext::PSSetShader(0);
+ GalliumD3D11DeviceContext::GSSetShader(0);
+ GalliumD3D11DeviceContext::VSSetShader(0);
+#endif
+
+ GalliumD3D11DeviceContext::IASetInputLayout(0);
+ GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
+ GalliumD3D11DeviceContext::RSSetState(0);
+ GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
+ GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
+ GalliumD3D11DeviceContext::SetPredication(0, 0);
+ GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
+
+ GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
+ GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
+ GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
+#if API >= 11
+ GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
+ GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
+ GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
+#endif
+
+ GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
+#if API >= 11
+ GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
+#else
+ GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
+#endif
+ GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
+
+ GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
+ GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
+ GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
+#if API >= 11
+ GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
+ GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
+ GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
+#endif
+
+ GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
+ GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
+ GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
+#if API >= 11
+ GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
+ GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
+ GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
+#endif
+
+ GalliumD3D11DeviceContext::RSSetViewports(0, 0);
+ GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
+ }
+
+ virtual void STDMETHODCALLTYPE Flush(void)
+ {
+ SYNCHRONIZED;
+ pipe->flush(pipe, PIPE_FLUSH_FRAME, 0);
+ }
+
+ /* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
+ * cleanly unbound from the pipeline.
+ * In Direct3D 11, the pipeline holds a reference.
+ *
+ * Note that instead of always scanning the pipeline on destruction, we could
+ * maintain the internal reference count on DirectX 10 and use it to check if an
+ * object is still bound.
+ * Presumably, on average, scanning is faster if the application is well written.
+ */
+#if API < 11
+#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
+ void Unbind##name(ID3D11##name* state) \
+ { \
+ SYNCHRONIZED; \
+ if((void*)state == (void*)member.p) \
+ { \
+ member.p = 0; \
+ pipe->bind_##gallium##_state(pipe, default_##def); \
+ } \
+ }
+ IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
+ IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
+ IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
+ IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
+ IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
+ IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
+ IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
+
+ void UnbindPredicate(ID3D11Predicate* predicate)
+ {
+ SYNCHRONIZED;
+ if(predicate == render_predicate)
+ {
+ render_predicate.p = NULL;
+ render_predicate_value = 0;
+ pipe->render_condition(pipe, 0, 0);
+ }
+ }
+
+ void UnbindSamplerState(ID3D11SamplerState* state)
+ {
+ SYNCHRONIZED;
+ for(unsigned s = 0; s < D3D11_STAGES; ++s)
+ {
+ for(unsigned i = 0; i < num_samplers[s]; ++i)
+ {
+ if(samplers[s][i] == state)
+ {
+ samplers[s][i].p = NULL;
+ sampler_csos[s].v[i] = NULL;
+ update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
+ }
+ }
+ }
+ }
+
+ void UnbindBuffer(ID3D11Buffer* buffer)
+ {
+ SYNCHRONIZED;
+ if(buffer == index_buffer)
+ {
+ index_buffer.p = 0;
+ index_format = DXGI_FORMAT_UNKNOWN;
+ index_offset = 0;
+ struct pipe_index_buffer ib;
+ memset(&ib, 0, sizeof(ib));
+ pipe->set_index_buffer(pipe, &ib);
+ }
+
+ for(unsigned i = 0; i < num_vertex_buffers; ++i)
+ {
+ if(buffer == input_buffers[i])
+ {
+ input_buffers[i].p = 0;
+ memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
+ update_flags |= UPDATE_VERTEX_BUFFERS;
+ }
+ }
+
+ for(unsigned s = 0; s < D3D11_STAGES; ++s)
+ {
+ for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
+ {
+ if(constant_buffers[s][i] == buffer)
+ {
+ constant_buffers[s][i] = (ID3D10Buffer*)NULL;
+ pipe->set_constant_buffer(pipe, s, i, NULL);
+ }
+ }
+ }
+ }
+
+ void UnbindDepthStencilView(ID3D11DepthStencilView* view)
+ {
+ SYNCHRONIZED;
+ if(view == depth_stencil_view)
+ {
+ depth_stencil_view.p = NULL;
+ set_framebuffer();
+ }
+ }
+
+ void UnbindRenderTargetView(ID3D11RenderTargetView* view)
+ {
+ SYNCHRONIZED;
+ bool any_bound = false;
+ for(unsigned i = 0; i < num_render_target_views; ++i)
+ {
+ if(render_target_views[i] == view)
+ {
+ render_target_views[i].p = NULL;
+ any_bound = true;
+ }
+ }
+ if(any_bound)
+ set_framebuffer();
+ }
+
+ void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
+ {
+ SYNCHRONIZED;
+ for(unsigned s = 0; s < D3D11_STAGES; ++s)
+ {
+ for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
+ {
+ if(shader_resource_views[s][i] == view)
+ {
+ shader_resource_views[s][i].p = NULL;
+ sampler_views[s][i] = NULL;
+ update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
+ }
+ }
+ }
+ }
+#endif
+
+#undef SYNCHRONIZED
+};
+
+#if API >= 11
+/* This approach serves two purposes.
+ * First, we don't want to do an atomic operation to manipulate the reference
+ * count every time something is bound/unbound to the pipeline, since they are
+ * expensive.
+ * Fortunately, the immediate context can only be used by a single thread, so
+ * we don't have to use them, as long as a separate reference count is used
+ * (see dual_refcnt_t).
+ *
+ * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
+ * garbage cycle.
+ * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
+ * one for each external reference count, while internal nonatomic_add_ref doesn't
+ * add any.
+ *
+ * Note that ideally we would to eliminate the non-atomic op too, but this is more
+ * complicated, since we would either need to use garbage collection and give up
+ * deterministic destruction (especially bad for large textures), or scan the whole
+ * pipeline state every time the reference count of object drops to 0, which risks
+ * pathological slowdowns.
+ *
+ * Since this microoptimization should matter relatively little, let's avoid it for now.
+ *
+ * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
+ * Eliminating the atomic ops for deferred contexts seems substantially harder.
+ * This might be a problem if they are used in a one-shot multithreaded rendering
+ * fashion, where SMP cacheline bouncing on the reference count may be visible.
+ *
+ * The idea would be to attach a structure of reference counts indexed by deferred
+ * context id to each object. Ideally, this should be organized like ext2 block pointers.
+ *
+ * Every deferred context would get a reference count in its own cacheline.
+ * The external count is protected by a lock bit, and there is also a "lock bit" in each
+ * internal count.
+ *
+ * When the external count has to be dropped to 0, the lock bit is taken and all internal
+ * reference counts are scanned, taking a count of them. A flag would also be set on them.
+ * Deferred context manipulation would notice the flag, and update the count.
+ * Once the count goes to zero, the object is freed.
+ *
+ * The problem of this is that if the external reference count ping-pongs between
+ * zero and non-zero, the scans will take a lot of time.
+ *
+ * The idea to solve this is to compute the scans in a binary-tree like fashion, where
+ * each binary tree node would have a "determined bit", which would be invalidated
+ * by manipulations.
+ *
+ * However, all this complexity might actually be a loss in most cases, so let's just
+ * stick to a single atomic refcnt for now.
+ *
+ * Also, we don't even support deferred contexts yet, so this can wait.
+ */
+struct nonatomic_device_child_ptr_traits
+{
+ static void add_ref(void* p)
+ {
+ if(p)
+ ((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
+ }
+
+ static void release(void* p)
+ {
+ if(p)
+ ((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
+ }
+};
+
+struct GalliumD3D11ImmediateDeviceContext
+ : public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
+{
+ GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
+ : GalliumD3D11DeviceContext(device, pipe, context_flags)
+ {
+ // not necessary, but tests that the API at least basically works
+ ClearState();
+ }
+
+ /* we do this since otherwise we would have a garbage cycle between this and the device */
+ virtual ULONG STDMETHODCALLTYPE AddRef()
+ {
+ return this->device->AddRef();
+ }
+
+ virtual ULONG STDMETHODCALLTYPE Release()
+ {
+ return this->device->Release();
+ }
+
+ virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
+ {
+ return D3D11_DEVICE_CONTEXT_IMMEDIATE;
+ }
+};
+
+static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
+{
+ return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
+}
+
+static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
+{
+ ((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
+}
+
+static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
+{
+ ((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
+}
+
+static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
+{
+ delete (GalliumD3D11ImmediateDeviceContext*)context;
+}
+#endif