summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/SConscript13
-rw-r--r--src/gallium/auxiliary/Android.mk6
-rw-r--r--src/gallium/auxiliary/Makefile8
-rw-r--r--src/gallium/auxiliary/SConscript9
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c665
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h7
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c27
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c46
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c18
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c41
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h1
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c40
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h73
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c50
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h10
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_io.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c14
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c32
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.h18
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c59
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.c141
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.cpp357
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.h10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c81
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c60
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp62
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c25
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c73
-rw-r--r--src/gallium/auxiliary/os/os_thread.h3
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c11
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h4
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c17
-rw-r--r--src/gallium/auxiliary/rbug/rbug_context.c4
-rw-r--r--src/gallium/auxiliary/rbug/rbug_context.h2
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c3
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_noop_helper.h51
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c59
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c61
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.h17
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c307
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h14
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c15
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h17
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.h6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.h6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c19
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c79
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c187
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.h6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c107
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h101
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_util.c7
-rw-r--r--src/gallium/auxiliary/translate/translate.c2
-rw-r--r--src/gallium/auxiliary/translate/translate_cache.c4
-rw-r--r--src/gallium/auxiliary/util/dbghelp.h1265
-rw-r--r--src/gallium/auxiliary/util/u_blit.c165
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c135
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h34
-rw-r--r--src/gallium/auxiliary/util/u_cache.c170
-rw-r--r--src/gallium/auxiliary/util/u_cache.h4
-rw-r--r--src/gallium/auxiliary/util/u_caps.c3
-rw-r--r--src/gallium/auxiliary/util/u_debug.c85
-rw-r--r--src/gallium/auxiliary/util/u_debug.h37
-rw-r--r--src/gallium/auxiliary/util/u_debug_refcnt.c12
-rw-r--r--src/gallium/auxiliary/util/u_debug_symbol.c95
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.c18
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.h5
-rw-r--r--src/gallium/auxiliary/util/u_dump_state.c1
-rw-r--r--src/gallium/auxiliary/util/u_format.csv10
-rw-r--r--src/gallium/auxiliary/util/u_format.h42
-rw-r--r--src/gallium/auxiliary/util/u_format_latc.c328
-rw-r--r--src/gallium/auxiliary/util/u_format_latc.h108
-rw-r--r--src/gallium/auxiliary/util/u_format_pack.py2
-rw-r--r--src/gallium/auxiliary/util/u_format_rgtc.c464
-rw-r--r--src/gallium/auxiliary/util/u_format_rgtc.h114
-rwxr-xr-xsrc/gallium/auxiliary/util/u_format_table.py4
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c131
-rw-r--r--src/gallium/auxiliary/util/u_index_modify.c127
-rw-r--r--src/gallium/auxiliary/util/u_index_modify.h27
-rw-r--r--src/gallium/auxiliary/util/u_inlines.h59
-rw-r--r--src/gallium/auxiliary/util/u_math.h25
-rw-r--r--src/gallium/auxiliary/util/u_pstipple.c434
-rw-r--r--src/gallium/auxiliary/util/u_pstipple.h (renamed from src/gallium/targets/dri-noop/swrast_drm_api.c)59
-rw-r--r--src/gallium/auxiliary/util/u_resource.c10
-rw-r--r--src/gallium/auxiliary/util/u_simple_screen.h3
-rw-r--r--src/gallium/auxiliary/util/u_surface.c2
-rw-r--r--src/gallium/auxiliary/util/u_tile.c81
-rw-r--r--src/gallium/auxiliary/util/u_tile.h24
-rw-r--r--src/gallium/auxiliary/util/u_transfer.c14
-rw-r--r--src/gallium/auxiliary/util/u_transfer.h24
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.c168
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.h54
-rw-r--r--src/gallium/auxiliary/util/u_vbuf_mgr.c609
-rw-r--r--src/gallium/auxiliary/util/u_vbuf_mgr.h121
-rw-r--r--src/gallium/docs/d3d11ddi.txt5
-rw-r--r--src/gallium/docs/source/context.rst125
-rw-r--r--src/gallium/docs/source/screen.rst3
-rw-r--r--src/gallium/docs/source/tgsi.rst239
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c20
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.c6
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.h9
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.c7
-rw-r--r--src/gallium/drivers/cell/ppu/cell_screen.c3
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_vertex.c7
-rw-r--r--src/gallium/drivers/failover/fo_context.c23
-rw-r--r--src/gallium/drivers/failover/fo_state.c8
-rw-r--r--src/gallium/drivers/galahad/glhd_context.c36
-rw-r--r--src/gallium/drivers/galahad/glhd_screen.c20
-rw-r--r--src/gallium/drivers/i915/TODO25
-rw-r--r--src/gallium/drivers/i915/i915_batch.h7
-rw-r--r--src/gallium/drivers/i915/i915_batchbuffer.h26
-rw-r--r--src/gallium/drivers/i915/i915_blit.c23
-rw-r--r--src/gallium/drivers/i915/i915_clear.c105
-rw-r--r--src/gallium/drivers/i915/i915_context.c76
-rw-r--r--src/gallium/drivers/i915/i915_context.h85
-rw-r--r--src/gallium/drivers/i915/i915_debug.c19
-rw-r--r--src/gallium/drivers/i915/i915_debug.h1
-rw-r--r--src/gallium/drivers/i915/i915_flush.c29
-rw-r--r--src/gallium/drivers/i915/i915_fpc_translate.c23
-rw-r--r--src/gallium/drivers/i915/i915_prim_emit.c5
-rw-r--r--src/gallium/drivers/i915/i915_prim_vbuf.c31
-rw-r--r--src/gallium/drivers/i915/i915_reg.h1
-rw-r--r--src/gallium/drivers/i915/i915_resource.c1
-rw-r--r--src/gallium/drivers/i915/i915_resource_buffer.c40
-rw-r--r--src/gallium/drivers/i915/i915_resource_texture.c49
-rw-r--r--src/gallium/drivers/i915/i915_screen.c114
-rw-r--r--src/gallium/drivers/i915/i915_screen.h20
-rw-r--r--src/gallium/drivers/i915/i915_state.c159
-rw-r--r--src/gallium/drivers/i915/i915_state.h1
-rw-r--r--src/gallium/drivers/i915/i915_state_derived.c1
-rw-r--r--src/gallium/drivers/i915/i915_state_dynamic.c69
-rw-r--r--src/gallium/drivers/i915/i915_state_emit.c702
-rw-r--r--src/gallium/drivers/i915/i915_state_fpc.c2
-rw-r--r--src/gallium/drivers/i915/i915_state_immediate.c68
-rw-r--r--src/gallium/drivers/i915/i915_state_static.c154
-rw-r--r--src/gallium/drivers/i915/i915_surface.c152
-rw-r--r--src/gallium/drivers/i915/i915_winsys.h16
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.c4
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.h5
-rw-r--r--src/gallium/drivers/i965/brw_clip.c6
-rw-r--r--src/gallium/drivers/i965/brw_clip.h2
-rw-r--r--src/gallium/drivers/i965/brw_clip_line.c14
-rw-r--r--src/gallium/drivers/i965/brw_clip_state.c4
-rw-r--r--src/gallium/drivers/i965/brw_clip_tri.c8
-rw-r--r--src/gallium/drivers/i965/brw_clip_util.c12
-rw-r--r--src/gallium/drivers/i965/brw_context.c9
-rw-r--r--src/gallium/drivers/i965/brw_context.h15
-rw-r--r--src/gallium/drivers/i965/brw_defines.h361
-rw-r--r--src/gallium/drivers/i965/brw_disasm.c179
-rw-r--r--src/gallium/drivers/i965/brw_disasm.h6
-rw-r--r--src/gallium/drivers/i965/brw_draw_upload.c14
-rw-r--r--src/gallium/drivers/i965/brw_eu_emit.c158
-rw-r--r--src/gallium/drivers/i965/brw_gs.c4
-rw-r--r--src/gallium/drivers/i965/brw_gs_state.c2
-rw-r--r--src/gallium/drivers/i965/brw_misc_state.c24
-rw-r--r--src/gallium/drivers/i965/brw_pipe_flush.c1
-rw-r--r--src/gallium/drivers/i965/brw_pipe_rast.c2
-rw-r--r--src/gallium/drivers/i965/brw_pipe_surface.c21
-rw-r--r--src/gallium/drivers/i965/brw_pipe_vertex.c24
-rw-r--r--src/gallium/drivers/i965/brw_reg.h52
-rw-r--r--src/gallium/drivers/i965/brw_resource_buffer.c20
-rw-r--r--src/gallium/drivers/i965/brw_resource_texture.c45
-rw-r--r--src/gallium/drivers/i965/brw_resource_texture_layout.c2
-rw-r--r--src/gallium/drivers/i965/brw_screen.c74
-rw-r--r--src/gallium/drivers/i965/brw_screen.h6
-rw-r--r--src/gallium/drivers/i965/brw_sf_emit.c4
-rw-r--r--src/gallium/drivers/i965/brw_sf_state.c4
-rw-r--r--src/gallium/drivers/i965/brw_state.h2
-rw-r--r--src/gallium/drivers/i965/brw_state_upload.c2
-rw-r--r--src/gallium/drivers/i965/brw_structs.h238
-rw-r--r--src/gallium/drivers/i965/brw_structs_dump.c2
-rw-r--r--src/gallium/drivers/i965/brw_urb.c4
-rw-r--r--src/gallium/drivers/i965/brw_vs.h1
-rw-r--r--src/gallium/drivers/i965/brw_vs_emit.c17
-rw-r--r--src/gallium/drivers/i965/brw_vs_state.c8
-rw-r--r--src/gallium/drivers/i965/brw_winsys.h4
-rw-r--r--src/gallium/drivers/i965/brw_winsys_debug.c6
-rw-r--r--src/gallium/drivers/i965/brw_wm_emit.c12
-rw-r--r--src/gallium/drivers/i965/brw_wm_fp.c2
-rw-r--r--src/gallium/drivers/i965/brw_wm_glsl.c6
-rw-r--r--src/gallium/drivers/i965/brw_wm_state.c10
-rw-r--r--src/gallium/drivers/i965/intel_decode.c563
-rw-r--r--src/gallium/drivers/i965/intel_decode.h4
-rw-r--r--src/gallium/drivers/i965/intel_structs.h2
-rw-r--r--src/gallium/drivers/identity/id_context.c34
-rw-r--r--src/gallium/drivers/identity/id_screen.c20
-rw-r--r--src/gallium/drivers/llvmpipe/README23
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c47
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene_queue.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c24
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c22
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c21
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vertex.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_surface.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_main.c13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.h10
-rw-r--r--src/gallium/drivers/noop/noop_pipe.c90
-rw-r--r--src/gallium/drivers/noop/noop_public.h5
-rw-r--r--src/gallium/drivers/noop/noop_state.c2
-rw-r--r--src/gallium/drivers/nouveau/Makefile6
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.c487
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.h139
-rw-r--r--src/gallium/drivers/nouveau/nouveau_context.h26
-rw-r--r--src/gallium/drivers/nouveau/nouveau_fence.c223
-rw-r--r--src/gallium/drivers/nouveau/nouveau_fence.h59
-rw-r--r--src/gallium/drivers/nouveau/nouveau_mm.c288
-rw-r--r--src/gallium/drivers/nouveau/nouveau_mm.h32
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c45
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h25
-rw-r--r--src/gallium/drivers/nouveau/nouveau_stateobj.h316
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h8
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h57
-rw-r--r--src/gallium/drivers/nv50/Makefile11
-rw-r--r--src/gallium/drivers/nv50/SConscript6
-rw-r--r--src/gallium/drivers/nv50/nv50_2d.xml.h393
-rw-r--r--src/gallium/drivers/nv50/nv50_3d.xml.h2084
-rw-r--r--src/gallium/drivers/nv50/nv50_3ddefs.xml.h98
-rw-r--r--src/gallium/drivers/nv50/nv50_buffer.c151
-rw-r--r--src/gallium/drivers/nv50/nv50_clear.c74
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c195
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h397
-rw-r--r--src/gallium/drivers/nv50/nv50_defs.xml.h142
-rw-r--r--src/gallium/drivers/nv50/nv50_draw.c37
-rw-r--r--src/gallium/drivers/nv50/nv50_formats.c314
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c480
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.c5
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_emit.c3
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_optimize.c8
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c34
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h8
-rw-r--r--src/gallium/drivers/nv50/nv50_push.c493
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c364
-rw-r--r--src/gallium/drivers/nv50/nv50_reg.h1827
-rw-r--r--src/gallium/drivers/nv50/nv50_resource.c68
-rw-r--r--src/gallium/drivers/nv50/nv50_resource.h101
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c1121
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h149
-rw-r--r--src/gallium/drivers/nv50/nv50_shader_state.c703
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c1311
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c687
-rw-r--r--src/gallium/drivers/nv50/nv50_stateobj.h55
-rw-r--r--src/gallium/drivers/nv50/nv50_stateobj_tex.h34
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c570
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c465
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.h197
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.xml.h279
-rw-r--r--src/gallium/drivers/nv50/nv50_tgsi_to_nc.c27
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c636
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.h39
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c1122
-rw-r--r--src/gallium/drivers/nv50/nv50_winsys.h106
-rw-r--r--src/gallium/drivers/nvc0/Makefile34
-rw-r--r--src/gallium/drivers/nvc0/SConscript33
-rw-r--r--src/gallium/drivers/nvc0/nvc0_2d.xml.h380
-rw-r--r--src/gallium/drivers/nvc0/nvc0_3d.xml.h1244
-rw-r--r--src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h98
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c208
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h244
-rw-r--r--src/gallium/drivers/nvc0/nvc0_draw.c88
-rw-r--r--src/gallium/drivers/nvc0/nvc0_formats.c523
-rw-r--r--src/gallium/drivers/nvc0/nvc0_graph_macros.h235
-rw-r--r--src/gallium/drivers/nvc0/nvc0_m2mf.xml.h138
-rw-r--r--src/gallium/drivers/nvc0/nvc0_miptree.c319
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc.c716
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc.h650
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_emit.c1020
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_optimize.c1420
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_print.c381
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_regalloc.c1051
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c728
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.h92
-rw-r--r--src/gallium/drivers/nvc0/nvc0_push.c354
-rw-r--r--src/gallium/drivers/nvc0/nvc0_push2.c333
-rw-r--r--src/gallium/drivers/nvc0/nvc0_query.c341
-rw-r--r--src/gallium/drivers/nvc0/nvc0_resource.c51
-rw-r--r--src/gallium/drivers/nvc0/nvc0_resource.h75
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c676
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h148
-rw-r--r--src/gallium/drivers/nvc0/nvc0_shader_state.c249
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state.c860
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state_validate.c483
-rw-r--r--src/gallium/drivers/nvc0/nvc0_stateobj.h60
-rw-r--r--src/gallium/drivers/nvc0/nvc0_surface.c451
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tex.c302
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c2023
-rw-r--r--src/gallium/drivers/nvc0/nvc0_transfer.c385
-rw-r--r--src/gallium/drivers/nvc0/nvc0_transfer.h44
-rw-r--r--src/gallium/drivers/nvc0/nvc0_vbo.c653
-rw-r--r--src/gallium/drivers/nvc0/nvc0_winsys.h120
-rw-r--r--src/gallium/drivers/nvfx/Makefile1
-rw-r--r--src/gallium/drivers/nvfx/nv04_2d.c2
-rw-r--r--src/gallium/drivers/nvfx/nv30_fragtex.c3
-rw-r--r--src/gallium/drivers/nvfx/nv40_fragtex.c5
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.c13
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.h14
-rw-r--r--src/gallium/drivers/nvfx/nvfx_draw.c14
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragprog.c16
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragtex.c4
-rw-r--r--src/gallium/drivers/nvfx/nvfx_miptree.c6
-rw-r--r--src/gallium/drivers/nvfx/nvfx_push.c57
-rw-r--r--src/gallium/drivers/nvfx/nvfx_query.c12
-rw-r--r--src/gallium/drivers/nvfx/nvfx_resource.c9
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.c72
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state.c2
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_emit.c70
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_fb.c39
-rw-r--r--src/gallium/drivers/nvfx/nvfx_surface.c8
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vbo.c42
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vertprog.c14
-rw-r--r--src/gallium/drivers/r300/r300_blit.c303
-rw-r--r--src/gallium/drivers/r300/r300_cb.h54
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c10
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h15
-rw-r--r--src/gallium/drivers/r300/r300_context.c165
-rw-r--r--src/gallium/drivers/r300/r300_context.h250
-rw-r--r--src/gallium/drivers/r300/r300_cs.h51
-rw-r--r--src/gallium/drivers/r300/r300_debug.c4
-rw-r--r--src/gallium/drivers/r300/r300_emit.c555
-rw-r--r--src/gallium/drivers/r300/r300_emit.h5
-rw-r--r--src/gallium/drivers/r300/r300_flush.c62
-rw-r--r--src/gallium/drivers/r300/r300_fs.c135
-rw-r--r--src/gallium/drivers/r300/r300_fs.h9
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c310
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.h35
-rw-r--r--src/gallium/drivers/r300/r300_query.c23
-rw-r--r--src/gallium/drivers/r300/r300_reg.h17
-rw-r--r--src/gallium/drivers/r300/r300_render.c641
-rw-r--r--src/gallium/drivers/r300/r300_render_translate.c233
-rw-r--r--src/gallium/drivers/r300/r300_resource.c18
-rw-r--r--src/gallium/drivers/r300/r300_screen.c94
-rw-r--r--src/gallium/drivers/r300/r300_screen.h5
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.c267
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.h51
-rw-r--r--src/gallium/drivers/r300/r300_state.c559
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c170
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.h30
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h53
-rw-r--r--src/gallium/drivers/r300/r300_texture.c300
-rw-r--r--src/gallium/drivers/r300/r300_texture.h13
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.c439
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.h16
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c9
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.h3
-rw-r--r--src/gallium/drivers/r300/r300_transfer.c83
-rw-r--r--src/gallium/drivers/r300/r300_vs.c7
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h139
-rw-r--r--src/gallium/drivers/r600/Android.mk43
-rw-r--r--src/gallium/drivers/r600/SConscript2
-rw-r--r--src/gallium/drivers/r600/eg_asm.c51
-rw-r--r--src/gallium/drivers/r600/eg_state_inlines.h176
-rw-r--r--src/gallium/drivers/r600/eg_states_inc.h458
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c520
-rw-r--r--src/gallium/drivers/r600/evergreend.h22
-rw-r--r--src/gallium/drivers/r600/r600.h25
-rw-r--r--src/gallium/drivers/r600/r600_asm.c1833
-rw-r--r--src/gallium/drivers/r600/r600_asm.h33
-rw-r--r--src/gallium/drivers/r600/r600_blit.c160
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c343
-rw-r--r--src/gallium/drivers/r600/r600_opcodes.h4
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c148
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h92
-rw-r--r--src/gallium/drivers/r600/r600_query.c30
-rw-r--r--src/gallium/drivers/r600/r600_resource.c1
-rw-r--r--src/gallium/drivers/r600/r600_resource.h70
-rw-r--r--src/gallium/drivers/r600/r600_shader.c1321
-rw-r--r--src/gallium/drivers/r600/r600_shader.h3
-rw-r--r--src/gallium/drivers/r600/r600_sq.h6
-rw-r--r--src/gallium/drivers/r600/r600_state.c594
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c358
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h188
-rw-r--r--src/gallium/drivers/r600/r600_states_inc.h543
-rw-r--r--src/gallium/drivers/r600/r600_texture.c405
-rw-r--r--src/gallium/drivers/r600/r600_translate.c185
-rw-r--r--src/gallium/drivers/r600/r600d.h35
-rw-r--r--src/gallium/drivers/r600/r700_asm.c20
-rw-r--r--src/gallium/drivers/rbug/rbug_context.c248
-rw-r--r--src/gallium/drivers/rbug/rbug_core.c2
-rw-r--r--src/gallium/drivers/rbug/rbug_objects.c6
-rw-r--r--src/gallium/drivers/rbug/rbug_screen.c20
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c70
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h27
-rw-r--r--src/gallium/drivers/softpipe/sp_fence.c13
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.c84
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.h9
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c14
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c10
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c10
-rw-r--r--src/gallium/drivers/softpipe/sp_state_sampler.c127
-rw-r--r--src/gallium/drivers/softpipe/sp_state_shader.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_state_vertex.c8
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c503
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.h37
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.c65
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.h9
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c37
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c22
-rw-r--r--src/gallium/drivers/svga/svga_cmd.c5
-rw-r--r--src/gallium/drivers/svga/svga_cmd.h3
-rw-r--r--src/gallium/drivers/svga/svga_context.c74
-rw-r--r--src/gallium/drivers/svga/svga_context.h28
-rw-r--r--src/gallium/drivers/svga/svga_draw.c8
-rw-r--r--src/gallium/drivers/svga/svga_draw_arrays.c5
-rw-r--r--src/gallium/drivers/svga/svga_draw_elements.c14
-rw-r--r--src/gallium/drivers/svga/svga_pipe_blit.c4
-rw-r--r--src/gallium/drivers/svga/svga_pipe_draw.c8
-rw-r--r--src/gallium/drivers/svga/svga_pipe_flush.c40
-rw-r--r--src/gallium/drivers/svga/svga_pipe_misc.c2
-rw-r--r--src/gallium/drivers/svga/svga_pipe_rasterizer.c37
-rw-r--r--src/gallium/drivers/svga/svga_pipe_sampler.c16
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vertex.c1
-rw-r--r--src/gallium/drivers/svga/svga_resource.c2
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.c168
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.h6
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.c91
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.h4
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.c104
-rw-r--r--src/gallium/drivers/svga/svga_sampler_view.c26
-rw-r--r--src/gallium/drivers/svga/svga_sampler_view.h6
-rw-r--r--src/gallium/drivers/svga/svga_screen.c41
-rw-r--r--src/gallium/drivers/svga/svga_screen.h2
-rw-r--r--src/gallium/drivers/svga/svga_state.h4
-rw-r--r--src/gallium/drivers/svga/svga_state_constants.c9
-rw-r--r--src/gallium/drivers/svga/svga_state_framebuffer.c49
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_need_swtnl.c31
-rw-r--r--src/gallium/drivers/svga/svga_state_tss.c78
-rw-r--r--src/gallium/drivers/svga/svga_state_vdecl.c9
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c6
-rw-r--r--src/gallium/drivers/svga/svga_surface.c30
-rw-r--r--src/gallium/drivers/svga/svga_surface.h5
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_backend.c14
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_draw.c16
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_state.c2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c236
-rw-r--r--src/gallium/drivers/svga/svga_winsys.h7
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_shader_op.c4
-rw-r--r--src/gallium/drivers/trace/tr_context.c53
-rw-r--r--src/gallium/drivers/trace/tr_dump.c16
-rw-r--r--src/gallium/drivers/trace/tr_dump.h5
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c1
-rw-r--r--src/gallium/drivers/trace/tr_screen.c28
-rw-r--r--src/gallium/include/pipe/p_compiler.h12
-rw-r--r--src/gallium/include/pipe/p_context.h32
-rw-r--r--src/gallium/include/pipe/p_defines.h60
-rw-r--r--src/gallium/include/pipe/p_format.h21
-rw-r--r--src/gallium/include/pipe/p_screen.h20
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h33
-rw-r--r--src/gallium/include/pipe/p_state.h10
-rw-r--r--src/gallium/include/state_tracker/st_api.h5
-rw-r--r--src/gallium/state_trackers/Android.mk1
-rw-r--r--src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp13
-rw-r--r--src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h3
-rw-r--r--src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h20
-rwxr-xr-xsrc/gallium/state_trackers/d3d1x/progs/bin/d3d10tri.exebin11776 -> 0 bytes
-rwxr-xr-xsrc/gallium/state_trackers/d3d1x/progs/bin/d3d11gears.exebin27136 -> 0 bytes
-rwxr-xr-xsrc/gallium/state_trackers/d3d1x/progs/bin/d3d11spikysphere.exebin17408 -> 0 bytes
-rwxr-xr-xsrc/gallium/state_trackers/d3d1x/progs/bin/d3d11tex.exebin22016 -> 0 bytes
-rwxr-xr-xsrc/gallium/state_trackers/d3d1x/progs/bin/d3d11tri.exebin11776 -> 0 bytes
-rw-r--r--src/gallium/state_trackers/dri/common/dri_context.c22
-rw-r--r--src/gallium/state_trackers/dri/common/dri_drawable.c2
-rw-r--r--src/gallium/state_trackers/dri/common/dri_drawable.h3
-rw-r--r--src/gallium/state_trackers/dri/common/dri_screen.c30
-rw-r--r--src/gallium/state_trackers/dri/drm/dri2.c95
-rw-r--r--src/gallium/state_trackers/dri/drm/dri2_buffer.h22
-rw-r--r--src/gallium/state_trackers/dri/sw/drisw.c3
-rw-r--r--src/gallium/state_trackers/egl/Makefile15
-rw-r--r--src/gallium/state_trackers/egl/SConscript38
-rw-r--r--src/gallium/state_trackers/egl/android/native_android.cpp162
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d.c173
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d.h1
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_api.c87
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_api.h3
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_image.c18
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_sync.c4
-rw-r--r--src/gallium/state_trackers/egl/common/native.h40
-rw-r--r--src/gallium/state_trackers/egl/common/native_helper.c150
-rw-r--r--src/gallium/state_trackers/egl/common/native_helper.h36
-rw-r--r--src/gallium/state_trackers/egl/drm/modeset.c48
-rw-r--r--src/gallium/state_trackers/egl/drm/native_drm.c21
-rw-r--r--src/gallium/state_trackers/egl/drm/native_drm.h2
-rw-r--r--src/gallium/state_trackers/egl/fbdev/native_fbdev.c18
-rw-r--r--src/gallium/state_trackers/egl/gdi/native_gdi.c19
-rw-r--r--src/gallium/state_trackers/egl/wayland/native_wayland.c626
-rw-r--r--src/gallium/state_trackers/egl/wayland/native_wayland.h97
-rw-r--r--src/gallium/state_trackers/egl/x11/native_dri2.c91
-rw-r--r--src/gallium/state_trackers/egl/x11/native_x11.c28
-rw-r--r--src/gallium/state_trackers/egl/x11/native_ximage.c83
-rw-r--r--src/gallium/state_trackers/glx/xlib/glx_api.c85
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_api.c15
-rw-r--r--src/gallium/state_trackers/python/p_context.i10
-rw-r--r--src/gallium/state_trackers/python/p_device.i10
-rw-r--r--src/gallium/state_trackers/vega/Makefile26
-rw-r--r--src/gallium/state_trackers/vega/SConscript23
-rw-r--r--src/gallium/state_trackers/vega/api_context.c7
-rw-r--r--src/gallium/state_trackers/vega/api_filters.c29
-rw-r--r--src/gallium/state_trackers/vega/api_images.c48
-rw-r--r--src/gallium/state_trackers/vega/api_masks.c35
-rw-r--r--src/gallium/state_trackers/vega/api_paint.c56
-rw-r--r--src/gallium/state_trackers/vega/api_params.c178
-rw-r--r--src/gallium/state_trackers/vega/api_path.c48
-rw-r--r--src/gallium/state_trackers/vega/api_text.c29
-rw-r--r--src/gallium/state_trackers/vega/handle.c93
-rw-r--r--src/gallium/state_trackers/vega/handle.h171
-rw-r--r--src/gallium/state_trackers/vega/image.c12
-rw-r--r--src/gallium/state_trackers/vega/mask.c2
-rw-r--r--src/gallium/state_trackers/vega/paint.c7
-rw-r--r--src/gallium/state_trackers/vega/paint.h1
-rw-r--r--src/gallium/state_trackers/vega/polygon.c2
-rw-r--r--src/gallium/state_trackers/vega/renderer.c24
-rw-r--r--src/gallium/state_trackers/vega/shader.c4
-rw-r--r--src/gallium/state_trackers/vega/text.c1
-rw-r--r--src/gallium/state_trackers/vega/vg_context.c29
-rw-r--r--src/gallium/state_trackers/vega/vg_context.h17
-rw-r--r--src/gallium/state_trackers/vega/vg_manager.c10
-rw-r--r--src/gallium/state_trackers/vega/vgu.c13
-rw-r--r--src/gallium/state_trackers/wgl/SConscript3
-rw-r--r--src/gallium/state_trackers/wgl/stw_context.c8
-rw-r--r--src/gallium/state_trackers/wgl/stw_device.c23
-rw-r--r--src/gallium/state_trackers/wgl/stw_pixelformat.c4
-rw-r--r--src/gallium/state_trackers/xorg/xorg_crtc.c13
-rw-r--r--src/gallium/state_trackers/xorg/xorg_dri2.c11
-rw-r--r--src/gallium/state_trackers/xorg/xorg_driver.c73
-rw-r--r--src/gallium/state_trackers/xorg/xorg_exa.c21
-rw-r--r--src/gallium/state_trackers/xorg/xorg_exa.h2
-rw-r--r--src/gallium/state_trackers/xorg/xorg_renderer.c14
-rw-r--r--src/gallium/state_trackers/xorg/xorg_tracker.h1
-rw-r--r--src/gallium/state_trackers/xorg/xorg_xv.c16
-rw-r--r--src/gallium/targets/Android.mk148
-rw-r--r--src/gallium/targets/SConscript.dri1
-rw-r--r--src/gallium/targets/dri-noop/Makefile34
-rw-r--r--src/gallium/targets/dri-noop/SConscript31
-rw-r--r--src/gallium/targets/dri-nouveau/Makefile1
-rw-r--r--src/gallium/targets/dri-r300/Makefile2
-rw-r--r--src/gallium/targets/dri-r300/SConscript2
-rw-r--r--src/gallium/targets/dri-r600/Makefile5
-rw-r--r--src/gallium/targets/dri-r600/SConscript2
-rw-r--r--src/gallium/targets/dri-r600/target.c2
-rw-r--r--src/gallium/targets/dri-vmwgfx/Makefile1
-rw-r--r--src/gallium/targets/dri-vmwgfx/SConscript8
-rw-r--r--src/gallium/targets/dri-vmwgfx/target.c3
-rw-r--r--src/gallium/targets/egl-gdi/SConscript55
-rw-r--r--src/gallium/targets/egl-static/SConscript134
-rw-r--r--src/gallium/targets/egl-static/egl.c (renamed from src/gallium/targets/egl-gdi/egl-static.c)127
-rw-r--r--src/gallium/targets/egl-static/egl_pipe.c215
-rw-r--r--src/gallium/targets/egl-static/egl_pipe.h40
-rw-r--r--src/gallium/targets/egl-static/egl_st.c105
-rw-r--r--src/gallium/targets/egl-static/egl_st.h40
-rw-r--r--src/gallium/targets/egl/Makefile37
-rw-r--r--src/gallium/targets/egl/egl.c38
-rw-r--r--src/gallium/targets/egl/st_GLESv1_CM.c8
-rw-r--r--src/gallium/targets/egl/st_GLESv2.c8
-rw-r--r--src/gallium/targets/graw-gdi/graw_gdi.c4
-rw-r--r--src/gallium/targets/graw-null/graw_util.c1
-rw-r--r--src/gallium/targets/graw-xlib/graw_xlib.c20
-rw-r--r--src/gallium/targets/libgl-gdi/SConscript7
-rw-r--r--src/gallium/targets/libgl-xlib/Makefile4
-rw-r--r--src/gallium/targets/libgl-xlib/SConscript7
-rw-r--r--src/gallium/targets/libgl-xlib/xlib.c1
-rw-r--r--src/gallium/targets/xorg-radeon/Makefile2
-rw-r--r--src/gallium/tests/graw/SConscript3
-rw-r--r--src/gallium/tests/graw/clear.c18
-rw-r--r--src/gallium/tests/graw/fs-test.c18
-rw-r--r--src/gallium/tests/graw/gs-test.c19
-rw-r--r--src/gallium/tests/graw/quad-sample.c414
-rw-r--r--src/gallium/tests/graw/quad-tex.c19
-rw-r--r--src/gallium/tests/graw/shader-leak.c18
-rw-r--r--src/gallium/tests/graw/tri-gs.c17
-rw-r--r--src/gallium/tests/graw/tri-instanced.c17
-rw-r--r--src/gallium/tests/graw/tri.c16
-rw-r--r--src/gallium/tests/graw/vs-test.c16
-rw-r--r--src/gallium/tests/trivial/quad-tex.c5
-rw-r--r--src/gallium/tests/trivial/tri.c5
-rw-r--r--src/gallium/tests/unit/SConscript2
-rw-r--r--src/gallium/winsys/SConscript4
-rw-r--r--src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c57
-rw-r--r--src/gallium/winsys/i915/drm/i915_drm_winsys.c1
-rw-r--r--src/gallium/winsys/i915/drm/i915_drm_winsys.h1
-rw-r--r--src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c29
-rw-r--r--src/gallium/winsys/i915/sw/i915_sw_buffer.c2
-rw-r--r--src/gallium/winsys/i915/sw/i915_sw_winsys.c2
-rw-r--r--src/gallium/winsys/i965/drm/i965_drm_buffer.c4
-rw-r--r--src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c3
-rw-r--r--src/gallium/winsys/r600/Android.mk38
-rw-r--r--src/gallium/winsys/r600/drm/Makefile5
-rw-r--r--src/gallium/winsys/r600/drm/SConscript10
-rw-r--r--src/gallium/winsys/r600/drm/bof.c2
-rw-r--r--src/gallium/winsys/r600/drm/evergreen_hw_context.c76
-rw-r--r--src/gallium/winsys/r600/drm/r600.c175
-rw-r--r--src/gallium/winsys/r600/drm/r600_bo.c174
-rw-r--r--src/gallium/winsys/r600/drm/r600_bomgr.c161
-rw-r--r--src/gallium/winsys/r600/drm/r600_drm.c280
-rw-r--r--src/gallium/winsys/r600/drm/r600_hw_context.c238
-rw-r--r--src/gallium/winsys/r600/drm/r600_priv.h125
-rw-r--r--src/gallium/winsys/r600/drm/r600d.h18
-rw-r--r--src/gallium/winsys/r600/drm/radeon_bo.c31
-rw-r--r--src/gallium/winsys/r600/drm/radeon_bo_pb.c260
-rw-r--r--src/gallium/winsys/r600/drm/radeon_pciid.c114
-rw-r--r--src/gallium/winsys/radeon/drm/Makefile6
-rw-r--r--src/gallium/winsys/radeon/drm/SConscript11
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.c602
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.h (renamed from src/gallium/winsys/radeon/drm/radeon_drm_buffer.h)54
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_buffer.c564
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_common.c129
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.c446
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.h102
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_r300.c266
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_winsys.h40
-rw-r--r--src/gallium/winsys/svga/drm/vmw_buffer.c3
-rw-r--r--src/gallium/winsys/svga/drm/vmw_context.c2
-rw-r--r--src/gallium/winsys/svga/drm/vmw_screen.c2
-rw-r--r--src/gallium/winsys/sw/dri/SConscript2
-rw-r--r--src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c9
-rw-r--r--src/gallium/winsys/sw/xlib/SConscript2
647 files changed, 50758 insertions, 20931 deletions
diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index 0efab834f6..31580556f0 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -42,6 +42,7 @@ if env['drm']:
# SConscript([
# 'drivers/nouveau/SConscript',
# 'drivers/nv50/SConscript',
+ # 'drivers/nvc0/SConscript',
# 'drivers/nvfx/SConscript',
# ])
@@ -55,6 +56,7 @@ SConscript('winsys/sw/null/SConscript')
SConscript('state_trackers/python/SConscript')
if env['platform'] != 'embedded':
SConscript('state_trackers/vega/SConscript')
+ SConscript('state_trackers/egl/SConscript')
if env['x11']:
SConscript('state_trackers/glx/xlib/SConscript')
@@ -66,10 +68,7 @@ if env['platform'] != 'embedded':
SConscript('state_trackers/xorg/SConscript')
if env['platform'] == 'windows':
- SConscript([
- 'state_trackers/egl/SConscript',
- 'state_trackers/wgl/SConscript',
- ])
+ SConscript('state_trackers/wgl/SConscript')
#
# Winsys
@@ -85,6 +84,11 @@ SConscript([
'targets/graw-null/SConscript',
])
+if env['platform'] != 'embedded':
+ SConscript([
+ 'targets/egl-static/SConscript'
+ ])
+
if env['x11']:
SConscript([
'targets/graw-xlib/SConscript',
@@ -95,7 +99,6 @@ if env['platform'] == 'windows':
SConscript([
'targets/graw-gdi/SConscript',
'targets/libgl-gdi/SConscript',
- #'egl-gdi/SConscript',
])
if env['dri']:
diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk
index 7c333051b4..f5513794ee 100644
--- a/src/gallium/auxiliary/Android.mk
+++ b/src/gallium/auxiliary/Android.mk
@@ -108,7 +108,9 @@ C_SOURCES = \
util/u_draw_quad.c \
util/u_format.c \
util/u_format_other.c \
+ util/u_format_latc.c \
util/u_format_s3tc.c \
+ util/u_format_rgtc.c \
util/u_format_srgb.c \
util/u_format_table.c \
util/u_format_tests.c \
@@ -127,6 +129,7 @@ C_SOURCES = \
util/u_network.c \
util/u_math.c \
util/u_mm.c \
+ util/u_pstipple.c \
util/u_rect.c \
util/u_ringbuffer.c \
util/u_sampler.c \
@@ -140,7 +143,8 @@ C_SOURCES = \
util/u_tile.c \
util/u_transfer.c \
util/u_resource.c \
- util/u_upload_mgr.c
+ util/u_upload_mgr.c \
+ util/u_vbuf_mgr.c
GENERATED_SOURCES = \
indices/u_indices_gen.c \
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index ff355c4783..c7654046a5 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -110,7 +110,9 @@ C_SOURCES = \
util/u_draw_quad.c \
util/u_format.c \
util/u_format_other.c \
+ util/u_format_latc.c \
util/u_format_s3tc.c \
+ util/u_format_rgtc.c \
util/u_format_srgb.c \
util/u_format_table.c \
util/u_format_tests.c \
@@ -129,6 +131,7 @@ C_SOURCES = \
util/u_network.c \
util/u_math.c \
util/u_mm.c \
+ util/u_pstipple.c \
util/u_rect.c \
util/u_ringbuffer.c \
util/u_sampler.c \
@@ -142,7 +145,8 @@ C_SOURCES = \
util/u_tile.c \
util/u_transfer.c \
util/u_resource.c \
- util/u_upload_mgr.c
+ util/u_upload_mgr.c \
+ util/u_vbuf_mgr.c
# Disabling until pipe-video branch gets merged in
#vl/vl_bitstream_parser.c \
@@ -157,7 +161,6 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_bitarit.c \
gallivm/lp_bld_const.c \
gallivm/lp_bld_conv.c \
- gallivm/lp_bld_debug.c \
gallivm/lp_bld_flow.c \
gallivm/lp_bld_format_aos.c \
gallivm/lp_bld_format_soa.c \
@@ -185,6 +188,7 @@ GALLIVM_SOURCES = \
draw/draw_pt_fetch_shade_pipeline_llvm.c
GALLIVM_CPP_SOURCES = \
+ gallivm/lp_bld_debug.cpp \
gallivm/lp_bld_misc.cpp
GENERATED_SOURCES = \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index fca7e5fd11..8e422b2c11 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -157,7 +157,9 @@ source = [
'util/u_draw_quad.c',
'util/u_format.c',
'util/u_format_other.c',
+ 'util/u_format_latc.c',
'util/u_format_s3tc.c',
+ 'util/u_format_rgtc.c',
'util/u_format_srgb.c',
'util/u_format_table.c',
'util/u_format_tests.c',
@@ -176,6 +178,7 @@ source = [
'util/u_network.c',
'util/u_math.c',
'util/u_mm.c',
+ 'util/u_pstipple.c',
'util/u_rect.c',
'util/u_resource.c',
'util/u_ringbuffer.c',
@@ -190,6 +193,7 @@ source = [
'util/u_tile.c',
'util/u_transfer.c',
'util/u_upload_mgr.c',
+ 'util/u_vbuf_mgr.c',
# Disabling until pipe-video branch gets merged in
#'vl/vl_bitstream_parser.c',
#'vl/vl_mpeg12_mc_renderer.c',
@@ -199,16 +203,13 @@ source = [
]
if env['llvm']:
- if env['UDIS86']:
- env.Append(CPPDEFINES = [('HAVE_UDIS86', '1')])
-
source += [
'gallivm/lp_bld_arit.c',
'gallivm/lp_bld_assert.c',
'gallivm/lp_bld_bitarit.c',
'gallivm/lp_bld_const.c',
'gallivm/lp_bld_conv.c',
- 'gallivm/lp_bld_debug.c',
+ 'gallivm/lp_bld_debug.cpp',
'gallivm/lp_bld_flow.c',
'gallivm/lp_bld_format_aos.c',
'gallivm/lp_bld_format_soa.c',
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 58b022d531..fdd40fca12 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -47,41 +47,45 @@
#include "cso_cache/cso_hash.h"
#include "cso_context.h"
-struct cso_context {
- struct pipe_context *pipe;
- struct cso_cache *cache;
+/**
+ * Info related to samplers and sampler views.
+ * We have one of these for fragment samplers and another for vertex samplers.
+ */
+struct sampler_info
+{
struct {
void *samplers[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
-
- void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
- unsigned nr_vertex_samplers;
} hw;
void *samplers[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
- void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
- unsigned nr_vertex_samplers;
-
- unsigned nr_samplers_saved;
void *samplers_saved[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers_saved;
+
+ struct pipe_sampler_view *views[PIPE_MAX_SAMPLERS];
+ unsigned nr_views;
+
+ struct pipe_sampler_view *views_saved[PIPE_MAX_SAMPLERS];
+ unsigned nr_views_saved;
+};
+
- unsigned nr_vertex_samplers_saved;
- void *vertex_samplers_saved[PIPE_MAX_VERTEX_SAMPLERS];
- uint nr_fragment_sampler_views;
- struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
+struct cso_context {
+ struct pipe_context *pipe;
+ struct cso_cache *cache;
- uint nr_vertex_sampler_views;
- struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS];
+ struct sampler_info fragment_samplers;
+ struct sampler_info vertex_samplers;
- uint nr_fragment_sampler_views_saved;
- struct pipe_sampler_view *fragment_sampler_views_saved[PIPE_MAX_SAMPLERS];
+ uint nr_vertex_buffers;
+ struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
- uint nr_vertex_sampler_views_saved;
- struct pipe_sampler_view *vertex_sampler_views_saved[PIPE_MAX_VERTEX_SAMPLERS];
+ uint nr_vertex_buffers_saved;
+ struct pipe_vertex_buffer vertex_buffers_saved[PIPE_MAX_ATTRIBS];
/** Current and saved state.
* The saved state is used as a 1-deep stack.
@@ -252,6 +256,8 @@ struct cso_context *cso_create_context( struct pipe_context *pipe )
if (ctx == NULL)
goto out;
+ assert(PIPE_MAX_SAMPLERS == PIPE_MAX_VERTEX_SAMPLERS);
+
ctx->cache = cso_cache_create();
if (ctx->cache == NULL)
goto out;
@@ -278,7 +284,8 @@ out:
void cso_release_all( struct cso_context *ctx )
{
unsigned i;
-
+ struct sampler_info *info;
+
if (ctx->pipe) {
ctx->pipe->bind_blend_state( ctx->pipe, NULL );
ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL );
@@ -294,19 +301,30 @@ void cso_release_all( struct cso_context *ctx )
ctx->pipe->set_vertex_sampler_views(ctx->pipe, 0, NULL);
}
+ /* free fragment samplers, views */
+ info = &ctx->fragment_samplers;
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- pipe_sampler_view_reference(&ctx->fragment_sampler_views[i], NULL);
- pipe_sampler_view_reference(&ctx->fragment_sampler_views_saved[i], NULL);
+ pipe_sampler_view_reference(&info->views[i], NULL);
+ pipe_sampler_view_reference(&info->views_saved[i], NULL);
}
- for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
- pipe_sampler_view_reference(&ctx->vertex_sampler_views[i], NULL);
- pipe_sampler_view_reference(&ctx->vertex_sampler_views_saved[i], NULL);
+ /* free vertex samplers, views */
+ info = &ctx->vertex_samplers;
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ pipe_sampler_view_reference(&info->views[i], NULL);
+ pipe_sampler_view_reference(&info->views_saved[i], NULL);
}
util_unreference_framebuffer_state(&ctx->fb);
util_unreference_framebuffer_state(&ctx->fb_saved);
+ util_copy_vertex_buffers(ctx->vertex_buffers,
+ &ctx->nr_vertex_buffers,
+ NULL, 0);
+ util_copy_vertex_buffers(ctx->vertex_buffers_saved,
+ &ctx->nr_vertex_buffers_saved,
+ NULL, 0);
+
if (ctx->cache) {
cso_cache_delete( ctx->cache );
ctx->cache = NULL;
@@ -395,233 +413,6 @@ void cso_restore_blend(struct cso_context *ctx)
-enum pipe_error cso_single_sampler(struct cso_context *ctx,
- unsigned idx,
- const struct pipe_sampler_state *templ)
-{
- void *handle = NULL;
-
- if (templ != NULL) {
- unsigned key_size = sizeof(struct pipe_sampler_state);
- unsigned hash_key = cso_construct_key((void*)templ, key_size);
- struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
- hash_key, CSO_SAMPLER,
- (void*)templ, key_size);
-
- if (cso_hash_iter_is_null(iter)) {
- struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler));
- if (!cso)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- memcpy(&cso->state, templ, sizeof(*templ));
- cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state);
- cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state;
- cso->context = ctx->pipe;
-
- iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso);
- if (cso_hash_iter_is_null(iter)) {
- FREE(cso);
- return PIPE_ERROR_OUT_OF_MEMORY;
- }
-
- handle = cso->data;
- }
- else {
- handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data;
- }
- }
-
- ctx->samplers[idx] = handle;
- return PIPE_OK;
-}
-
-enum pipe_error
-cso_single_vertex_sampler(struct cso_context *ctx,
- unsigned idx,
- const struct pipe_sampler_state *templ)
-{
- void *handle = NULL;
-
- if (templ != NULL) {
- unsigned key_size = sizeof(struct pipe_sampler_state);
- unsigned hash_key = cso_construct_key((void*)templ, key_size);
- struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
- hash_key, CSO_SAMPLER,
- (void*)templ, key_size);
-
- if (cso_hash_iter_is_null(iter)) {
- struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler));
- if (!cso)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- memcpy(&cso->state, templ, sizeof(*templ));
- cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state);
- cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state;
- cso->context = ctx->pipe;
-
- iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso);
- if (cso_hash_iter_is_null(iter)) {
- FREE(cso);
- return PIPE_ERROR_OUT_OF_MEMORY;
- }
-
- handle = cso->data;
- }
- else {
- handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data;
- }
- }
-
- ctx->vertex_samplers[idx] = handle;
- return PIPE_OK;
-}
-
-void cso_single_sampler_done( struct cso_context *ctx )
-{
- unsigned i;
-
- /* find highest non-null sampler */
- for (i = PIPE_MAX_SAMPLERS; i > 0; i--) {
- if (ctx->samplers[i - 1] != NULL)
- break;
- }
-
- ctx->nr_samplers = i;
-
- if (ctx->hw.nr_samplers != ctx->nr_samplers ||
- memcmp(ctx->hw.samplers,
- ctx->samplers,
- ctx->nr_samplers * sizeof(void *)) != 0)
- {
- memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *));
- ctx->hw.nr_samplers = ctx->nr_samplers;
-
- ctx->pipe->bind_fragment_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers);
- }
-}
-
-void
-cso_single_vertex_sampler_done(struct cso_context *ctx)
-{
- unsigned i;
-
- /* find highest non-null sampler */
- for (i = PIPE_MAX_VERTEX_SAMPLERS; i > 0; i--) {
- if (ctx->vertex_samplers[i - 1] != NULL)
- break;
- }
-
- ctx->nr_vertex_samplers = i;
-
- if (ctx->hw.nr_vertex_samplers != ctx->nr_vertex_samplers ||
- memcmp(ctx->hw.vertex_samplers,
- ctx->vertex_samplers,
- ctx->nr_vertex_samplers * sizeof(void *)) != 0)
- {
- memcpy(ctx->hw.vertex_samplers,
- ctx->vertex_samplers,
- ctx->nr_vertex_samplers * sizeof(void *));
- ctx->hw.nr_vertex_samplers = ctx->nr_vertex_samplers;
-
- ctx->pipe->bind_vertex_sampler_states(ctx->pipe,
- ctx->nr_vertex_samplers,
- ctx->vertex_samplers);
- }
-}
-
-/*
- * If the function encouters any errors it will return the
- * last one. Done to always try to set as many samplers
- * as possible.
- */
-enum pipe_error cso_set_samplers( struct cso_context *ctx,
- unsigned nr,
- const struct pipe_sampler_state **templates )
-{
- unsigned i;
- enum pipe_error temp, error = PIPE_OK;
-
- /* TODO: fastpath
- */
-
- for (i = 0; i < nr; i++) {
- temp = cso_single_sampler( ctx, i, templates[i] );
- if (temp != PIPE_OK)
- error = temp;
- }
-
- for ( ; i < ctx->nr_samplers; i++) {
- temp = cso_single_sampler( ctx, i, NULL );
- if (temp != PIPE_OK)
- error = temp;
- }
-
- cso_single_sampler_done( ctx );
-
- return error;
-}
-
-void cso_save_samplers(struct cso_context *ctx)
-{
- ctx->nr_samplers_saved = ctx->nr_samplers;
- memcpy(ctx->samplers_saved, ctx->samplers, sizeof(ctx->samplers));
-}
-
-void cso_restore_samplers(struct cso_context *ctx)
-{
- ctx->nr_samplers = ctx->nr_samplers_saved;
- memcpy(ctx->samplers, ctx->samplers_saved, sizeof(ctx->samplers));
- cso_single_sampler_done( ctx );
-}
-
-/*
- * If the function encouters any errors it will return the
- * last one. Done to always try to set as many samplers
- * as possible.
- */
-enum pipe_error cso_set_vertex_samplers(struct cso_context *ctx,
- unsigned nr,
- const struct pipe_sampler_state **templates)
-{
- unsigned i;
- enum pipe_error temp, error = PIPE_OK;
-
- /* TODO: fastpath
- */
-
- for (i = 0; i < nr; i++) {
- temp = cso_single_vertex_sampler( ctx, i, templates[i] );
- if (temp != PIPE_OK)
- error = temp;
- }
-
- for ( ; i < ctx->nr_samplers; i++) {
- temp = cso_single_vertex_sampler( ctx, i, NULL );
- if (temp != PIPE_OK)
- error = temp;
- }
-
- cso_single_vertex_sampler_done( ctx );
-
- return error;
-}
-
-void
-cso_save_vertex_samplers(struct cso_context *ctx)
-{
- ctx->nr_vertex_samplers_saved = ctx->nr_vertex_samplers;
- memcpy(ctx->vertex_samplers_saved, ctx->vertex_samplers, sizeof(ctx->vertex_samplers));
-}
-
-void
-cso_restore_vertex_samplers(struct cso_context *ctx)
-{
- ctx->nr_vertex_samplers = ctx->nr_vertex_samplers_saved;
- memcpy(ctx->vertex_samplers, ctx->vertex_samplers_saved, sizeof(ctx->vertex_samplers));
- cso_single_vertex_sampler_done(ctx);
-}
-
-
enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx,
const struct pipe_depth_stencil_alpha_state *templ)
{
@@ -1143,121 +934,361 @@ void cso_restore_vertex_elements(struct cso_context *ctx)
ctx->velements_saved = NULL;
}
-/* fragment sampler view state */
+/* vertex buffers */
-void
-cso_set_fragment_sampler_views(struct cso_context *cso,
- uint count,
- struct pipe_sampler_view **views)
+void cso_set_vertex_buffers(struct cso_context *ctx,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
{
- uint i;
-
- for (i = 0; i < count; i++) {
- pipe_sampler_view_reference(&cso->fragment_sampler_views[i], views[i]);
+ if (count != ctx->nr_vertex_buffers ||
+ memcmp(buffers, ctx->vertex_buffers,
+ sizeof(struct pipe_vertex_buffer) * count) != 0) {
+ util_copy_vertex_buffers(ctx->vertex_buffers, &ctx->nr_vertex_buffers,
+ buffers, count);
+ ctx->pipe->set_vertex_buffers(ctx->pipe, count, buffers);
}
- for (; i < cso->nr_fragment_sampler_views; i++) {
- pipe_sampler_view_reference(&cso->fragment_sampler_views[i], NULL);
+}
+
+void cso_save_vertex_buffers(struct cso_context *ctx)
+{
+ util_copy_vertex_buffers(ctx->vertex_buffers_saved,
+ &ctx->nr_vertex_buffers_saved,
+ ctx->vertex_buffers,
+ ctx->nr_vertex_buffers);
+}
+
+void cso_restore_vertex_buffers(struct cso_context *ctx)
+{
+ util_copy_vertex_buffers(ctx->vertex_buffers,
+ &ctx->nr_vertex_buffers,
+ ctx->vertex_buffers_saved,
+ ctx->nr_vertex_buffers_saved);
+ ctx->pipe->set_vertex_buffers(ctx->pipe, ctx->nr_vertex_buffers,
+ ctx->vertex_buffers);
+}
+
+
+/**************** fragment/vertex sampler view state *************************/
+
+static enum pipe_error
+single_sampler(struct cso_context *ctx,
+ struct sampler_info *info,
+ unsigned idx,
+ const struct pipe_sampler_state *templ)
+{
+ void *handle = NULL;
+
+ if (templ != NULL) {
+ unsigned key_size = sizeof(struct pipe_sampler_state);
+ unsigned hash_key = cso_construct_key((void*)templ, key_size);
+ struct cso_hash_iter iter =
+ cso_find_state_template(ctx->cache,
+ hash_key, CSO_SAMPLER,
+ (void *) templ, key_size);
+
+ if (cso_hash_iter_is_null(iter)) {
+ struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ memcpy(&cso->state, templ, sizeof(*templ));
+ cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state);
+ cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state;
+ cso->context = ctx->pipe;
+
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ handle = cso->data;
+ }
+ else {
+ handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data;
+ }
}
- cso->pipe->set_fragment_sampler_views(cso->pipe,
- MAX2(count, cso->nr_fragment_sampler_views),
- cso->fragment_sampler_views);
+ info->samplers[idx] = handle;
- cso->nr_fragment_sampler_views = count;
+ return PIPE_OK;
}
-void
-cso_save_fragment_sampler_views(struct cso_context *cso)
+enum pipe_error
+cso_single_sampler(struct cso_context *ctx,
+ unsigned idx,
+ const struct pipe_sampler_state *templ)
{
- uint i;
+ return single_sampler(ctx, &ctx->fragment_samplers, idx, templ);
+}
+
+enum pipe_error
+cso_single_vertex_sampler(struct cso_context *ctx,
+ unsigned idx,
+ const struct pipe_sampler_state *templ)
+{
+ return single_sampler(ctx, &ctx->vertex_samplers, idx, templ);
+}
+
- cso->nr_fragment_sampler_views_saved = cso->nr_fragment_sampler_views;
- for (i = 0; i < cso->nr_fragment_sampler_views; i++) {
- assert(!cso->fragment_sampler_views_saved[i]);
+static void
+single_sampler_done(struct cso_context *ctx,
+ struct sampler_info *info)
+{
+ unsigned i;
- pipe_sampler_view_reference(&cso->fragment_sampler_views_saved[i],
- cso->fragment_sampler_views[i]);
+ /* find highest non-null sampler */
+ for (i = PIPE_MAX_SAMPLERS; i > 0; i--) {
+ if (info->samplers[i - 1] != NULL)
+ break;
+ }
+
+ info->nr_samplers = i;
+
+ if (info->hw.nr_samplers != info->nr_samplers ||
+ memcmp(info->hw.samplers,
+ info->samplers,
+ info->nr_samplers * sizeof(void *)) != 0)
+ {
+ memcpy(info->hw.samplers,
+ info->samplers,
+ info->nr_samplers * sizeof(void *));
+ info->hw.nr_samplers = info->nr_samplers;
+
+ if (info == &ctx->fragment_samplers) {
+ ctx->pipe->bind_fragment_sampler_states(ctx->pipe,
+ info->nr_samplers,
+ info->samplers);
+ }
+ else if (info == &ctx->vertex_samplers) {
+ ctx->pipe->bind_vertex_sampler_states(ctx->pipe,
+ info->nr_samplers,
+ info->samplers);
+ }
+ else {
+ assert(0);
+ }
}
}
void
-cso_restore_fragment_sampler_views(struct cso_context *cso)
+cso_single_sampler_done( struct cso_context *ctx )
{
- uint i;
+ single_sampler_done(ctx, &ctx->fragment_samplers);
+}
+
+void
+cso_single_vertex_sampler_done(struct cso_context *ctx)
+{
+ single_sampler_done(ctx, &ctx->vertex_samplers);
+}
- for (i = 0; i < cso->nr_fragment_sampler_views_saved; i++) {
- pipe_sampler_view_reference(&cso->fragment_sampler_views[i], cso->fragment_sampler_views_saved[i]);
- pipe_sampler_view_reference(&cso->fragment_sampler_views_saved[i], NULL);
+
+/*
+ * If the function encouters any errors it will return the
+ * last one. Done to always try to set as many samplers
+ * as possible.
+ */
+static enum pipe_error
+set_samplers(struct cso_context *ctx,
+ struct sampler_info *info,
+ unsigned nr,
+ const struct pipe_sampler_state **templates)
+{
+ unsigned i;
+ enum pipe_error temp, error = PIPE_OK;
+
+ /* TODO: fastpath
+ */
+
+ for (i = 0; i < nr; i++) {
+ temp = single_sampler(ctx, info, i, templates[i]);
+ if (temp != PIPE_OK)
+ error = temp;
}
- for (; i < cso->nr_fragment_sampler_views; i++) {
- pipe_sampler_view_reference(&cso->fragment_sampler_views[i], NULL);
+
+ for ( ; i < info->nr_samplers; i++) {
+ temp = single_sampler(ctx, info, i, NULL);
+ if (temp != PIPE_OK)
+ error = temp;
}
- cso->pipe->set_fragment_sampler_views(cso->pipe,
- MAX2(cso->nr_fragment_sampler_views, cso->nr_fragment_sampler_views_saved),
- cso->fragment_sampler_views);
+ single_sampler_done(ctx, info);
+
+ return error;
+}
+
+enum pipe_error
+cso_set_samplers(struct cso_context *ctx,
+ unsigned nr,
+ const struct pipe_sampler_state **templates)
+{
+ return set_samplers(ctx, &ctx->fragment_samplers, nr, templates);
+}
- cso->nr_fragment_sampler_views = cso->nr_fragment_sampler_views_saved;
- cso->nr_fragment_sampler_views_saved = 0;
+enum pipe_error
+cso_set_vertex_samplers(struct cso_context *ctx,
+ unsigned nr,
+ const struct pipe_sampler_state **templates)
+{
+ return set_samplers(ctx, &ctx->vertex_samplers, nr, templates);
}
-/* vertex sampler view state */
+
+static void
+save_samplers(struct cso_context *ctx, struct sampler_info *info)
+{
+ info->nr_samplers_saved = info->nr_samplers;
+ memcpy(info->samplers_saved, info->samplers, sizeof(info->samplers));
+}
void
-cso_set_vertex_sampler_views(struct cso_context *cso,
- uint count,
- struct pipe_sampler_view **views)
+cso_save_samplers(struct cso_context *ctx)
+{
+ save_samplers(ctx, &ctx->fragment_samplers);
+}
+
+void
+cso_save_vertex_samplers(struct cso_context *ctx)
+{
+ save_samplers(ctx, &ctx->vertex_samplers);
+}
+
+
+
+static void
+restore_samplers(struct cso_context *ctx, struct sampler_info *info)
+{
+ info->nr_samplers = info->nr_samplers_saved;
+ memcpy(info->samplers, info->samplers_saved, sizeof(info->samplers));
+ single_sampler_done(ctx, info);
+}
+
+void
+cso_restore_samplers(struct cso_context *ctx)
+{
+ restore_samplers(ctx, &ctx->fragment_samplers);
+}
+
+void
+cso_restore_vertex_samplers(struct cso_context *ctx)
+{
+ restore_samplers(ctx, &ctx->vertex_samplers);
+}
+
+
+
+static void
+set_sampler_views(struct cso_context *ctx,
+ struct sampler_info *info,
+ void (*set_views)(struct pipe_context *,
+ unsigned num_views,
+ struct pipe_sampler_view **),
+ uint count,
+ struct pipe_sampler_view **views)
{
uint i;
+ /* reference new views */
for (i = 0; i < count; i++) {
- pipe_sampler_view_reference(&cso->vertex_sampler_views[i], views[i]);
+ pipe_sampler_view_reference(&info->views[i], views[i]);
}
- for (; i < cso->nr_vertex_sampler_views; i++) {
- pipe_sampler_view_reference(&cso->vertex_sampler_views[i], NULL);
+ /* unref extra old views, if any */
+ for (; i < info->nr_views; i++) {
+ pipe_sampler_view_reference(&info->views[i], NULL);
}
- cso->pipe->set_vertex_sampler_views(cso->pipe,
- MAX2(count, cso->nr_vertex_sampler_views),
- cso->vertex_sampler_views);
+ info->nr_views = count;
- cso->nr_vertex_sampler_views = count;
+ /* bind the new sampler views */
+ set_views(ctx->pipe, count, info->views);
}
void
-cso_save_vertex_sampler_views(struct cso_context *cso)
+cso_set_fragment_sampler_views(struct cso_context *ctx,
+ uint count,
+ struct pipe_sampler_view **views)
{
- uint i;
+ set_sampler_views(ctx, &ctx->fragment_samplers,
+ ctx->pipe->set_fragment_sampler_views,
+ count, views);
+}
- cso->nr_vertex_sampler_views_saved = cso->nr_vertex_sampler_views;
+void
+cso_set_vertex_sampler_views(struct cso_context *ctx,
+ uint count,
+ struct pipe_sampler_view **views)
+{
+ set_sampler_views(ctx, &ctx->vertex_samplers,
+ ctx->pipe->set_vertex_sampler_views,
+ count, views);
+}
- for (i = 0; i < cso->nr_vertex_sampler_views; i++) {
- assert(!cso->vertex_sampler_views_saved[i]);
- pipe_sampler_view_reference(&cso->vertex_sampler_views_saved[i],
- cso->vertex_sampler_views[i]);
+
+static void
+save_sampler_views(struct cso_context *ctx,
+ struct sampler_info *info)
+{
+ uint i;
+
+ info->nr_views_saved = info->nr_views;
+
+ for (i = 0; i < info->nr_views; i++) {
+ assert(!info->views_saved[i]);
+ pipe_sampler_view_reference(&info->views_saved[i], info->views[i]);
}
}
void
-cso_restore_vertex_sampler_views(struct cso_context *cso)
+cso_save_fragment_sampler_views(struct cso_context *ctx)
+{
+ save_sampler_views(ctx, &ctx->fragment_samplers);
+}
+
+void
+cso_save_vertex_sampler_views(struct cso_context *ctx)
+{
+ save_sampler_views(ctx, &ctx->vertex_samplers);
+}
+
+
+static void
+restore_sampler_views(struct cso_context *ctx,
+ struct sampler_info *info,
+ void (*set_views)(struct pipe_context *,
+ unsigned num_views,
+ struct pipe_sampler_view **))
{
uint i;
- for (i = 0; i < cso->nr_vertex_sampler_views_saved; i++) {
- pipe_sampler_view_reference(&cso->vertex_sampler_views[i], cso->vertex_sampler_views_saved[i]);
- pipe_sampler_view_reference(&cso->vertex_sampler_views_saved[i], NULL);
+ for (i = 0; i < info->nr_views_saved; i++) {
+ pipe_sampler_view_reference(&info->views[i], info->views_saved[i]);
+ pipe_sampler_view_reference(&info->views_saved[i], NULL);
}
- for (; i < cso->nr_vertex_sampler_views; i++) {
- pipe_sampler_view_reference(&cso->vertex_sampler_views[i], NULL);
+ for (; i < info->nr_views; i++) {
+ pipe_sampler_view_reference(&info->views[i], NULL);
}
- cso->pipe->set_vertex_sampler_views(cso->pipe,
- MAX2(cso->nr_vertex_sampler_views, cso->nr_vertex_sampler_views_saved),
- cso->vertex_sampler_views);
+ /* bind the old/saved sampler views */
+ set_views(ctx->pipe, info->nr_views_saved, info->views);
+
+ info->nr_views = info->nr_views_saved;
+ info->nr_views_saved = 0;
+}
+
+void
+cso_restore_fragment_sampler_views(struct cso_context *ctx)
+{
+ restore_sampler_views(ctx, &ctx->fragment_samplers,
+ ctx->pipe->set_fragment_sampler_views);
+}
- cso->nr_vertex_sampler_views = cso->nr_vertex_sampler_views_saved;
- cso->nr_vertex_sampler_views_saved = 0;
+void
+cso_restore_vertex_sampler_views(struct cso_context *ctx)
+{
+ restore_sampler_views(ctx, &ctx->vertex_samplers,
+ ctx->pipe->set_vertex_sampler_views);
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index f0b07f7376..00edc9f8dd 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -110,6 +110,13 @@ void cso_save_vertex_elements(struct cso_context *ctx);
void cso_restore_vertex_elements(struct cso_context *ctx);
+void cso_set_vertex_buffers(struct cso_context *ctx,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers);
+void cso_save_vertex_buffers(struct cso_context *ctx);
+void cso_restore_vertex_buffers(struct cso_context *ctx);
+
+
/* These aren't really sensible -- most of the time the api provides
* object semantics for shaders anyway, and the cases where it doesn't
* (eg mesa's internall-generated texenv programs), it will be up to
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 73d5b6e403..95d9671987 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -35,6 +35,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_cpu_detect.h"
+#include "util/u_inlines.h"
#include "draw_context.h"
#include "draw_vs.h"
#include "draw_gs.h"
@@ -87,8 +88,14 @@ draw_create_gallivm(struct pipe_context *pipe, struct gallivm_state *gallivm)
goto fail;
#if HAVE_LLVM
- if (draw_get_option_use_llvm() && gallivm) {
- draw->llvm = draw_llvm_create(draw, gallivm);
+ if (draw_get_option_use_llvm()) {
+ if (!gallivm) {
+ gallivm = gallivm_create();
+ draw->own_gallivm = gallivm;
+ }
+
+ if (gallivm)
+ draw->llvm = draw_llvm_create(draw, gallivm);
}
#endif
@@ -164,6 +171,10 @@ void draw_destroy( struct draw_context *draw )
}
}
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ pipe_resource_reference(&draw->pt.vertex_buffer[i].buffer, NULL);
+ }
+
/* Not so fast -- we're just borrowing this at the moment.
*
if (draw->render)
@@ -175,8 +186,11 @@ void draw_destroy( struct draw_context *draw )
draw_vs_destroy( draw );
draw_gs_destroy( draw );
#ifdef HAVE_LLVM
- if(draw->llvm)
+ if (draw->llvm)
draw_llvm_destroy( draw->llvm );
+
+ if (draw->own_gallivm)
+ gallivm_destroy(draw->own_gallivm);
#endif
FREE( draw );
@@ -307,8 +321,9 @@ draw_set_vertex_buffers(struct draw_context *draw,
{
assert(count <= PIPE_MAX_ATTRIBS);
- memcpy(draw->pt.vertex_buffer, buffers, count * sizeof(buffers[0]));
- draw->pt.nr_vertex_buffers = count;
+ util_copy_vertex_buffers(draw->pt.vertex_buffer,
+ &draw->pt.nr_vertex_buffers,
+ buffers, count);
}
@@ -395,7 +410,7 @@ void
draw_wide_line_threshold(struct draw_context *draw, float threshold)
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->pipeline.wide_line_threshold = threshold;
+ draw->pipeline.wide_line_threshold = roundf(threshold);
}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 3b8286f05d..a5217c1d4e 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -214,13 +214,12 @@ static LLVMTypeRef
create_jit_vertex_buffer_type(struct gallivm_state *gallivm)
{
LLVMTargetDataRef target = gallivm->target;
- LLVMTypeRef elem_types[4];
+ LLVMTypeRef elem_types[3];
LLVMTypeRef vb_type;
elem_types[0] =
- elem_types[1] =
- elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
- elem_types[3] = LLVMPointerType(LLVMOpaqueTypeInContext(gallivm->context), 0); /* vs_constants */
+ elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
+ elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */
vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
Elements(elem_types), 0);
@@ -230,7 +229,7 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm)
LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
target, vb_type, 0);
LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
- target, vb_type, 2);
+ target, vb_type, 1);
LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
@@ -437,6 +436,7 @@ generate_vs(struct draw_llvm *llvm,
LLVMBuilderRef builder,
LLVMValueRef (*outputs)[NUM_CHANNELS],
const LLVMValueRef (*inputs)[NUM_CHANNELS],
+ LLVMValueRef system_values_array,
LLVMValueRef context_ptr,
struct lp_build_sampler_soa *draw_sampler)
{
@@ -468,6 +468,7 @@ generate_vs(struct draw_llvm *llvm,
vs_type,
NULL /*struct lp_build_mask_context *mask*/,
consts_ptr,
+ system_values_array,
NULL /*pos*/,
inputs,
outputs,
@@ -509,9 +510,7 @@ generate_fetch(struct gallivm_state *gallivm,
LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
&indices, 1, "");
LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf);
- LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(gallivm, vbuf);
LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf);
- LLVMValueRef cond;
LLVMValueRef stride;
if (velem->instance_divisor) {
@@ -521,10 +520,6 @@ generate_fetch(struct gallivm_state *gallivm,
"instance_divisor");
}
- /* limit index to min(index, vb_max_index) */
- cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
- index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
-
stride = LLVMBuildMul(builder, vb_stride, index, "");
vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
@@ -1118,7 +1113,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
LLVMValueRef start, end, count, stride, step, io_itr;
LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
LLVMValueRef instance_id;
+ LLVMValueRef system_values_array;
struct draw_context *draw = llvm->draw;
+ const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
unsigned i, j;
struct lp_build_context bld;
struct lp_build_loop_state lp_loop;
@@ -1179,6 +1176,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
lp_build_context_init(&bld, llvm->gallivm, lp_type_int(32));
+ system_values_array = lp_build_system_values_array(gallivm, vs_info,
+ instance_id, NULL);
+
end = lp_build_add(&bld, start, count);
step = lp_build_const_int32(gallivm, max_vertices);
@@ -1233,6 +1233,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
builder,
outputs,
ptr_aos,
+ system_values_array,
context_ptr,
sampler);
@@ -1263,8 +1264,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
/* store clipmask in vertex header and positions in data */
convert_to_aos(gallivm, io, outputs, clipmask,
- draw->vs.vertex_shader->info.num_outputs,
- max_vertices);
+ vs_info->num_outputs, max_vertices);
}
lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
@@ -1315,7 +1315,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
LLVMValueRef instance_id;
+ LLVMValueRef system_values_array;
struct draw_context *draw = llvm->draw;
+ const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
unsigned i, j;
struct lp_build_context bld;
struct lp_build_loop_state lp_loop;
@@ -1376,6 +1378,10 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
lp_build_context_init(&bld, gallivm, lp_type_int(32));
+ system_values_array = lp_build_system_values_array(gallivm, vs_info,
+ instance_id, NULL);
+
+
step = lp_build_const_int32(gallivm, max_vertices);
/* code generated texture sampling */
@@ -1438,6 +1444,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
builder,
outputs,
ptr_aos,
+ system_values_array,
context_ptr,
sampler);
@@ -1471,8 +1478,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
* and transformed positions in data
*/
convert_to_aos(gallivm, io, outputs, clipmask,
- draw->vs.vertex_shader->info.num_outputs,
- max_vertices);
+ vs_info->num_outputs, max_vertices);
}
lp_build_loop_end_cond(&lp_loop, fetch_count, step, LLVMIntUGE);
@@ -1610,16 +1616,14 @@ draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
struct draw_llvm *llvm = variant->llvm;
if (variant->function_elts) {
- if (variant->function_elts)
- LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
- variant->function_elts);
+ LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
+ variant->function_elts);
LLVMDeleteFunction(variant->function_elts);
}
if (variant->function) {
- if (variant->function)
- LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
- variant->function);
+ LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
+ variant->function);
LLVMDeleteFunction(variant->function);
}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index 73c1d9251e..e8623e7bcd 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -133,11 +133,8 @@ struct draw_jit_context
#define draw_jit_vbuffer_stride(_gallivm, _ptr) \
lp_build_struct_get(_gallivm, _ptr, 0, "stride")
-#define draw_jit_vbuffer_max_index(_gallivm, _ptr) \
- lp_build_struct_get(_gallivm, _ptr, 1, "max_index")
-
#define draw_jit_vbuffer_offset(_gallivm, _ptr) \
- lp_build_struct_get(_gallivm, _ptr, 2, "buffer_offset")
+ lp_build_struct_get(_gallivm, _ptr, 1, "buffer_offset")
typedef int
@@ -229,7 +226,6 @@ struct draw_llvm_variant
/* key is variable-sized, must be last */
struct draw_llvm_variant_key key;
- /* key is variable-sized, must be last */
};
struct llvm_vertex_shader {
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 6206197dae..f1b0171f52 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -64,8 +64,8 @@ boolean draw_pipeline_init( struct draw_context *draw )
return FALSE;
/* these defaults are oriented toward the needs of softpipe */
- draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */
- draw->pipeline.wide_line_threshold = 1.0;
+ draw->pipeline.wide_point_threshold = 1000000.0f; /* infinity */
+ draw->pipeline.wide_line_threshold = 1.0f;
draw->pipeline.wide_point_sprites = FALSE;
draw->pipeline.line_stipple = TRUE;
draw->pipeline.point_sprite = TRUE;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 0851b9acc0..32af29ae14 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -55,9 +55,16 @@
/**
+ * Size for the alpha texture used for antialiasing
+ */
+#define TEXTURE_SIZE_LOG2 5 /* 32 x 32 */
+
+/**
* Max texture level for the alpha texture used for antialiasing
+ *
+ * Don't use the 1x1 and 2x2 mipmap levels.
*/
-#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */
+#define MAX_TEXTURE_LEVEL (TEXTURE_SIZE_LOG2 - 2)
/**
@@ -403,8 +410,8 @@ aaline_create_texture(struct aaline_stage *aaline)
texTemp.target = PIPE_TEXTURE_2D;
texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
texTemp.last_level = MAX_TEXTURE_LEVEL;
- texTemp.width0 = 1 << MAX_TEXTURE_LEVEL;
- texTemp.height0 = 1 << MAX_TEXTURE_LEVEL;
+ texTemp.width0 = 1 << TEXTURE_SIZE_LOG2;
+ texTemp.height0 = 1 << TEXTURE_SIZE_LOG2;
texTemp.depth0 = 1;
texTemp.array_size = 1;
texTemp.bind = PIPE_BIND_SAMPLER_VIEW;
@@ -461,7 +468,7 @@ aaline_create_texture(struct aaline_stage *aaline)
d = 200; /* tuneable */
}
else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) {
- d = 0;
+ d = 35; /* edge texel */
}
else {
d = 255;
@@ -498,8 +505,7 @@ aaline_create_sampler(struct aaline_stage *aaline)
sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
sampler.normalized_coords = 1;
sampler.min_lod = 0.0f;
- /* avoid using the 1x1 and 2x2 mipmap levels */
- sampler.max_lod = MAX_TEXTURE_LEVEL - 2;
+ sampler.max_lod = MAX_TEXTURE_LEVEL;
aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
if (aaline->sampler_cso == NULL)
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 5ea552f51c..60f6380c50 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -874,6 +874,8 @@ draw_install_aapoint_stage(struct draw_context *draw,
{
struct aapoint_stage *aapoint;
+ pipe->draw = (void *) draw;
+
/*
* Create / install AA point drawing / prim stage
*/
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index f5515c1df7..fe3627be86 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -27,8 +27,9 @@
/**
* Polygon stipple stage: implement polygon stipple with texture map and
- * fragment program. The fragment program samples the texture and does
- * a fragment kill for the stipple-failing fragments.
+ * fragment program. The fragment program samples the texture using the
+ * fragment window coordinate register and does a fragment kill for the
+ * stipple-failing fragments.
*
* Authors: Brian Paul
*/
@@ -114,7 +115,8 @@ struct pstip_stage
/**
* Subclass of tgsi_transform_context, used for transforming the
- * user's fragment shader to add the special AA instructions.
+ * user's fragment shader to add the extra texture sample and fragment kill
+ * instructions.
*/
struct pstip_transform_context {
struct tgsi_transform_context base;
@@ -386,13 +388,6 @@ pstip_update_texture(struct pstip_stage *pstip)
uint i, j;
ubyte *data;
- /* XXX: want to avoid flushing just because we use stipple:
- *
- * Flush should no longer be necessary if driver is properly
- * interleaving drawing and transfers on a given context:
- */
- pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL );
-
transfer = pipe_get_transfer(pipe, pstip->texture, 0, 0,
PIPE_TRANSFER_WRITE, 0, 0, 32, 32);
data = pipe->transfer_map(pipe, transfer);
@@ -658,16 +653,16 @@ pstip_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *fs)
{
struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
- struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader);
+ struct pstip_fragment_shader *pstipfs = CALLOC_STRUCT(pstip_fragment_shader);
- if (aafs) {
- aafs->state = *fs;
+ if (pstipfs) {
+ pstipfs->state = *fs;
/* pass-through */
- aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs);
+ pstipfs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs);
}
- return aafs;
+ return pstipfs;
}
@@ -675,12 +670,12 @@ static void
pstip_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
- struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs;
+ struct pstip_fragment_shader *pstipfs = (struct pstip_fragment_shader *) fs;
/* save current */
- pstip->fs = aafs;
+ pstip->fs = pstipfs;
/* pass-through */
pstip->driver_bind_fs_state(pstip->pipe,
- (aafs ? aafs->driver_fs : NULL));
+ (pstipfs ? pstipfs->driver_fs : NULL));
}
@@ -688,14 +683,14 @@ static void
pstip_delete_fs_state(struct pipe_context *pipe, void *fs)
{
struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
- struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs;
+ struct pstip_fragment_shader *pstipfs = (struct pstip_fragment_shader *) fs;
/* pass-through */
- pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs);
+ pstip->driver_delete_fs_state(pstip->pipe, pstipfs->driver_fs);
- if (aafs->pstip_fs)
- pstip->driver_delete_fs_state(pstip->pipe, aafs->pstip_fs);
+ if (pstipfs->pstip_fs)
+ pstip->driver_delete_fs_state(pstip->pipe, pstipfs->pstip_fs);
- FREE(aafs);
+ FREE(pstipfs);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index c575a8ac7c..27afba5af3 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -29,6 +29,7 @@
*/
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "pipe/p_defines.h"
#include "draw_private.h"
#include "draw_pipe.h"
@@ -86,7 +87,7 @@ draw_need_pipeline(const struct draw_context *draw,
return TRUE;
/* wide lines */
- if (rasterizer->line_width > draw->pipeline.wide_line_threshold)
+ if (roundf(rasterizer->line_width) > draw->pipeline.wide_line_threshold)
return TRUE;
/* AA lines */
@@ -169,7 +170,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
stage->next = next;
/* drawing wide lines? */
- wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold
+ wide_lines = (roundf(rast->line_width) > draw->pipeline.wide_line_threshold
&& !rast->line_smooth);
/* drawing large/sprite points (but not AA points)? */
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 06ed4d60ef..db2e3c5410 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -286,6 +286,7 @@ struct draw_context
#ifdef HAVE_LLVM
struct draw_llvm *llvm;
+ struct gallivm_state *own_gallivm;
#endif
struct pipe_sampler_view *sampler_views[PIPE_MAX_VERTEX_SAMPLERS];
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 4078b2a07d..c3d7e871f7 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -459,10 +459,9 @@ draw_vbo(struct draw_context *draw,
}
debug_printf("Buffers:\n");
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
- debug_printf(" %u: stride=%u maxindex=%u offset=%u ptr=%p\n",
+ debug_printf(" %u: stride=%u offset=%u ptr=%p\n",
i,
draw->pt.vertex_buffer[i].stride,
- draw->pt.vertex_buffer[i].max_index,
draw->pt.vertex_buffer[i].buffer_offset,
draw->pt.user.vbuffer[i]);
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index ae12ee24bd..4fa3b265e1 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -139,7 +139,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
draw->pt.vertex_buffer[i].stride,
- draw->pt.vertex_buffer[i].max_index);
+ draw->pt.user.max_index);
}
translate->run_elts( translate,
@@ -166,7 +166,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
draw->pt.vertex_buffer[i].stride,
- draw->pt.vertex_buffer[i].max_index);
+ draw->pt.user.max_index);
}
translate->run( translate,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index e706b7796f..51043102a6 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -186,7 +186,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
draw->pt.vertex_buffer[i].stride,
- draw->pt.vertex_buffer[i].max_index);
+ draw->pt.user.max_index);
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 7c198c6026..1e926fb028 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -58,8 +58,8 @@ struct fetch_shade_emit {
const ubyte *src[PIPE_MAX_ATTRIBS];
unsigned prim;
- struct draw_vs_varient_key key;
- struct draw_vs_varient *active;
+ struct draw_vs_variant_key key;
+ struct draw_vs_variant *active;
const struct vertex_info *vinfo;
@@ -150,7 +150,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
}
- fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader,
+ fse->active = draw_vs_lookup_variant( draw->vs.vertex_shader,
&fse->key );
if (!fse->active) {
@@ -169,7 +169,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
((const ubyte *) draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
draw->pt.vertex_buffer[i].stride,
- draw->pt.vertex_buffer[i].max_index );
+ draw->pt.user.max_index );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
index 3f66f962e1..75dba8c39a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -258,9 +258,10 @@ vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags,
boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0);
unsigned nr = 0, i;
- assert(icount + !!use_spoken <= vsplit->segment_size);
+ assert(icount <= vsplit->segment_size);
if (use_spoken) {
+ /* replace istart by i0 */
vsplit->fetch_elts[nr++] = i0;
for (i = 1 ; i < icount; i++)
vsplit->fetch_elts[nr++] = istart + i;
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index fb665b08ff..7caad6f005 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -165,10 +165,10 @@ draw_delete_vertex_shader(struct draw_context *draw,
{
unsigned i;
- for (i = 0; i < dvs->nr_varients; i++)
- dvs->varient[i]->destroy( dvs->varient[i] );
+ for (i = 0; i < dvs->nr_variants; i++)
+ dvs->variant[i]->destroy( dvs->variant[i] );
- dvs->nr_varients = 0;
+ dvs->nr_variants = 0;
dvs->delete( dvs );
}
@@ -225,40 +225,40 @@ draw_vs_destroy( struct draw_context *draw )
}
-struct draw_vs_varient *
-draw_vs_lookup_varient( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key )
+struct draw_vs_variant *
+draw_vs_lookup_variant( struct draw_vertex_shader *vs,
+ const struct draw_vs_variant_key *key )
{
- struct draw_vs_varient *varient;
+ struct draw_vs_variant *variant;
unsigned i;
- /* Lookup existing varient:
+ /* Lookup existing variant:
*/
- for (i = 0; i < vs->nr_varients; i++)
- if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0)
- return vs->varient[i];
+ for (i = 0; i < vs->nr_variants; i++)
+ if (draw_vs_variant_key_compare(key, &vs->variant[i]->key) == 0)
+ return vs->variant[i];
/* Else have to create a new one:
*/
- varient = vs->create_varient( vs, key );
- if (varient == NULL)
+ variant = vs->create_variant( vs, key );
+ if (variant == NULL)
return NULL;
/* Add it to our list, could be smarter:
*/
- if (vs->nr_varients < Elements(vs->varient)) {
- vs->varient[vs->nr_varients++] = varient;
+ if (vs->nr_variants < Elements(vs->variant)) {
+ vs->variant[vs->nr_variants++] = variant;
}
else {
- vs->last_varient++;
- vs->last_varient %= Elements(vs->varient);
- vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]);
- vs->varient[vs->last_varient] = varient;
+ vs->last_variant++;
+ vs->last_variant %= Elements(vs->variant);
+ vs->variant[vs->last_variant]->destroy(vs->variant[vs->last_variant]);
+ vs->variant[vs->last_variant] = variant;
}
/* Done
*/
- return varient;
+ return variant;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index f9a038788f..e6d187e977 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -33,12 +33,13 @@
#include "draw_context.h"
#include "draw_private.h"
+#include "draw_vertex.h"
struct draw_context;
struct pipe_shader_state;
-struct draw_varient_input
+struct draw_variant_input
{
enum pipe_format format;
unsigned buffer;
@@ -46,19 +47,19 @@ struct draw_varient_input
unsigned instance_divisor;
};
-struct draw_varient_output
+struct draw_variant_output
{
- enum pipe_format format; /* output format */
+ enum attrib_emit format; /* output format */
unsigned vs_output:8; /* which vertex shader output is this? */
unsigned offset:24; /* offset into output vertex */
};
-struct draw_varient_element {
- struct draw_varient_input in;
- struct draw_varient_output out;
+struct draw_variant_element {
+ struct draw_variant_input in;
+ struct draw_variant_output out;
};
-struct draw_vs_varient_key {
+struct draw_vs_variant_key {
unsigned output_stride;
unsigned nr_elements:8; /* max2(nr_inputs, nr_outputs) */
unsigned nr_inputs:8;
@@ -66,34 +67,34 @@ struct draw_vs_varient_key {
unsigned viewport:1;
unsigned clip:1;
unsigned const_vbuffers:5;
- struct draw_varient_element element[PIPE_MAX_ATTRIBS];
+ struct draw_variant_element element[PIPE_MAX_ATTRIBS];
};
-struct draw_vs_varient;
+struct draw_vs_variant;
-struct draw_vs_varient {
- struct draw_vs_varient_key key;
+struct draw_vs_variant {
+ struct draw_vs_variant_key key;
struct draw_vertex_shader *vs;
- void (*set_buffer)( struct draw_vs_varient *,
+ void (*set_buffer)( struct draw_vs_variant *,
unsigned i,
const void *ptr,
unsigned stride,
unsigned max_stride );
- void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
+ void (PIPE_CDECL *run_linear)( struct draw_vs_variant *shader,
unsigned start,
unsigned count,
void *output_buffer );
- void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader,
+ void (PIPE_CDECL *run_elts)( struct draw_vs_variant *shader,
const unsigned *elts,
unsigned count,
void *output_buffer );
- void (*destroy)( struct draw_vs_varient * );
+ void (*destroy)( struct draw_vs_variant * );
};
@@ -117,11 +118,11 @@ struct draw_vertex_shader {
/*
*/
- struct draw_vs_varient *varient[16];
- unsigned nr_varients;
- unsigned last_varient;
- struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader,
- const struct draw_vs_varient_key *key );
+ struct draw_vs_variant *variant[16];
+ unsigned nr_variants;
+ unsigned last_variant;
+ struct draw_vs_variant *(*create_variant)( struct draw_vertex_shader *shader,
+ const struct draw_vs_variant_key *key );
void (*prepare)( struct draw_vertex_shader *shader,
@@ -144,9 +145,9 @@ struct draw_vertex_shader {
};
-struct draw_vs_varient *
-draw_vs_lookup_varient( struct draw_vertex_shader *base,
- const struct draw_vs_varient_key *key );
+struct draw_vs_variant *
+draw_vs_lookup_variant( struct draw_vertex_shader *base,
+ const struct draw_vs_variant_key *key );
/********************************************************************************
@@ -166,12 +167,12 @@ draw_create_vs_ppc(struct draw_context *draw,
const struct pipe_shader_state *templ);
-struct draw_vs_varient_key;
+struct draw_vs_variant_key;
struct draw_vertex_shader;
-struct draw_vs_varient *
-draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key );
+struct draw_vs_variant *
+draw_vs_create_variant_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_variant_key *key );
#if HAVE_LLVM
struct draw_vertex_shader *
@@ -181,7 +182,7 @@ draw_create_vs_llvm(struct draw_context *draw,
/********************************************************************************
- * Helpers for vs implementations that don't do their own fetch/emit varients.
+ * Helpers for vs implementations that don't do their own fetch/emit variants.
* Means these can be shared between shaders.
*/
struct translate;
@@ -194,21 +195,21 @@ struct translate *draw_vs_get_fetch( struct draw_context *draw,
struct translate *draw_vs_get_emit( struct draw_context *draw,
struct translate_key *key );
-struct draw_vs_varient *
-draw_vs_create_varient_generic( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key );
+struct draw_vs_variant *
+draw_vs_create_variant_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_variant_key *key );
-static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key )
+static INLINE int draw_vs_variant_keysize( const struct draw_vs_variant_key *key )
{
- return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_varient_element);
+ return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_variant_element);
}
-static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a,
- const struct draw_vs_varient_key *b )
+static INLINE int draw_vs_variant_key_compare( const struct draw_vs_variant_key *a,
+ const struct draw_vs_variant_key *b )
{
- int keysize = draw_vs_varient_keysize(a);
+ int keysize = draw_vs_variant_keysize(a);
return memcmp(a, b, keysize);
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 19f49e34c8..7b90dba0cd 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1918,7 +1918,7 @@ static void find_last_write_outputs( struct aos_compilation *cp )
#define ARG_OUTBUF 4
-static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
+static boolean build_vertex_program( struct draw_vs_variant_aos_sse *variant,
boolean linear )
{
struct tgsi_parse_context parse;
@@ -1927,14 +1927,14 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
util_init_math();
- tgsi_parse_init( &parse, varient->base.vs->state.tokens );
+ tgsi_parse_init( &parse, variant->base.vs->state.tokens );
memset(&cp, 0, sizeof(cp));
cp.insn_counter = 1;
- cp.vaos = varient;
+ cp.vaos = variant;
cp.have_sse2 = 1;
- cp.func = &varient->func[ linear ? 0 : 1 ];
+ cp.func = &variant->func[ linear ? 0 : 1 ];
cp.tmp_EAX = x86_make_reg(file_REG32, reg_AX);
cp.idx_EBX = x86_make_reg(file_REG32, reg_BX);
@@ -2090,20 +2090,20 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
/** cast wrapper */
-static INLINE struct draw_vs_varient_aos_sse *
-draw_vs_varient_aos_sse(struct draw_vs_varient *varient)
+static INLINE struct draw_vs_variant_aos_sse *
+draw_vs_variant_aos_sse(struct draw_vs_variant *variant)
{
- return (struct draw_vs_varient_aos_sse *) varient;
+ return (struct draw_vs_variant_aos_sse *) variant;
}
-static void vaos_set_buffer( struct draw_vs_varient *varient,
+static void vaos_set_buffer( struct draw_vs_variant *variant,
unsigned buf,
const void *ptr,
unsigned stride,
unsigned max_stride)
{
- struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
+ struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant);
if (buf < vaos->nr_vb) {
vaos->buffer[buf].base_ptr = (char *)ptr;
@@ -2115,12 +2115,12 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,
-static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
+static void PIPE_CDECL vaos_run_elts( struct draw_vs_variant *variant,
const unsigned *elts,
unsigned count,
void *output_buffer )
{
- struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
+ struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant);
struct aos_machine *machine = vaos->draw->vs.aos_machine;
unsigned i;
@@ -2139,12 +2139,12 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
output_buffer );
}
-static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
+static void PIPE_CDECL vaos_run_linear( struct draw_vs_variant *variant,
unsigned start,
unsigned count,
void *output_buffer )
{
- struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
+ struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant);
struct aos_machine *machine = vaos->draw->vs.aos_machine;
unsigned i;
@@ -2171,9 +2171,9 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
-static void vaos_destroy( struct draw_vs_varient *varient )
+static void vaos_destroy( struct draw_vs_variant *variant )
{
- struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
+ struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant);
FREE( vaos->buffer );
@@ -2185,11 +2185,11 @@ static void vaos_destroy( struct draw_vs_varient *varient )
-static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key )
+static struct draw_vs_variant *variant_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_variant_key *key )
{
unsigned i;
- struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse);
+ struct draw_vs_variant_aos_sse *vaos = CALLOC_STRUCT(draw_vs_variant_aos_sse);
if (!vaos)
goto fail;
@@ -2249,17 +2249,17 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
}
-struct draw_vs_varient *
-draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key )
+struct draw_vs_variant *
+draw_vs_create_variant_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_variant_key *key )
{
- struct draw_vs_varient *varient = varient_aos_sse( vs, key );
+ struct draw_vs_variant *variant = variant_aos_sse( vs, key );
- if (varient == NULL) {
- varient = draw_vs_create_varient_generic( vs, key );
+ if (variant == NULL) {
+ variant = draw_vs_create_variant_generic( vs, key );
}
- return varient;
+ return variant;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index 68e8295b5e..55e63d8b9f 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -98,9 +98,9 @@ struct aos_buffer {
-/* This is the temporary storage used by all the aos_sse vs varients.
+/* This is the temporary storage used by all the aos_sse vs variants.
* Create one per context and reuse by passing a pointer in at
- * vs_varient creation??
+ * vs_variant creation??
*/
struct aos_machine {
float input [MAX_INPUTS ][4];
@@ -134,7 +134,7 @@ struct aos_machine {
struct aos_compilation {
struct x86_function *func;
- struct draw_vs_varient_aos_sse *vaos;
+ struct draw_vs_variant_aos_sse *vaos;
unsigned insn_counter;
unsigned num_immediates;
@@ -234,8 +234,8 @@ typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *,
void *output_buffer);
-struct draw_vs_varient_aos_sse {
- struct draw_vs_varient base;
+struct draw_vs_variant_aos_sse {
+ struct draw_vs_variant base;
struct draw_context *draw;
struct aos_buffer *buffer;
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index 8f8bbe7cb8..f1dd448773 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -384,7 +384,7 @@ static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
static boolean emit_output( struct aos_compilation *cp,
struct x86_reg ptr,
struct x86_reg dataXMM,
- unsigned format )
+ enum attrib_emit format )
{
switch (format) {
case EMIT_1F:
@@ -422,7 +422,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp )
unsigned i;
for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
- unsigned format = cp->vaos->base.key.element[i].out.format;
+ enum attrib_emit format = cp->vaos->base.key.element[i].out.format;
unsigned offset = cp->vaos->base.key.element[i].out.offset;
unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index dab3eb1ca8..c41d7c42a0 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -99,6 +99,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
constants, const_size);
+ if (shader->info.uses_instanceid) {
+ unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INSTANCEID];
+ assert(i < Elements(machine->SystemValue));
+ machine->SystemValue[i][0] = shader->draw->instance_id;
+ }
+
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
@@ -203,7 +209,7 @@ draw_create_vs_exec(struct draw_context *draw,
vs->base.prepare = vs_exec_prepare;
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
- vs->base.create_varient = draw_vs_create_varient_generic;
+ vs->base.create_variant = draw_vs_create_variant_generic;
vs->machine = draw->vs.machine;
return &vs->base;
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index fa9992db78..54a5574f73 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -65,19 +65,7 @@ static void
vs_llvm_delete( struct draw_vertex_shader *dvs )
{
struct llvm_vertex_shader *shader = llvm_vertex_shader(dvs);
- struct pipe_fence_handle *fence = NULL;
struct draw_llvm_variant_list_item *li;
- struct pipe_context *pipe = dvs->draw->pipe;
-
- /*
- * XXX: This might be not neccessary at all.
- */
- pipe->flush(pipe, 0, &fence);
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
-
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
@@ -119,7 +107,7 @@ draw_create_vs_llvm(struct draw_context *draw,
vs->base.prepare = vs_llvm_prepare;
vs->base.run_linear = vs_llvm_run_linear;
vs->base.delete = vs_llvm_delete;
- vs->base.create_varient = draw_vs_create_varient_generic;
+ vs->base.create_variant = draw_vs_create_variant_generic;
make_empty_list(&vs->variants);
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index 5df84916c5..cf894bbe8a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -187,10 +187,10 @@ draw_create_vs_ppc(struct draw_context *draw,
vs->base.draw = draw;
#if 0
if (1)
- vs->base.create_varient = draw_vs_varient_aos_ppc;
+ vs->base.create_variant = draw_vs_variant_aos_ppc;
else
#endif
- vs->base.create_varient = draw_vs_create_varient_generic;
+ vs->base.create_variant = draw_vs_create_variant_generic;
vs->base.prepare = vs_ppc_prepare;
vs->base.run_linear = vs_ppc_run_linear;
vs->base.delete = vs_ppc_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 0b0c6077c6..d918579bda 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -71,6 +71,12 @@ vs_sse_prepare( struct draw_vertex_shader *base,
struct tgsi_exec_machine *machine = shader->machine;
machine->Samplers = draw->vs.samplers;
+
+ if (base->info.uses_instanceid) {
+ unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INSTANCEID];
+ assert(i < Elements(machine->SystemValue));
+ machine->SystemValue[i][0] = base->draw->instance_id;
+ }
}
@@ -166,9 +172,9 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.draw = draw;
if (1)
- vs->base.create_varient = draw_vs_create_varient_aos_sse;
+ vs->base.create_variant = draw_vs_create_variant_aos_sse;
else
- vs->base.create_varient = draw_vs_create_varient_generic;
+ vs->base.create_variant = draw_vs_create_variant_generic;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
@@ -194,7 +200,8 @@ draw_create_vs_sse(struct draw_context *draw,
return &vs->base;
fail:
- debug_error("tgsi_emit_sse2() failed, falling back to interpreter\n");
+ if (0)
+ debug_warning("tgsi_emit_sse2() failed, falling back to interpreter\n");
x86_release_func( &vs->sse2_program );
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index eacd160187..d8f030f61e 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -41,8 +41,8 @@
/* A first pass at incorporating vertex fetch/emit functionality into
*/
-struct draw_vs_varient_generic {
- struct draw_vs_varient base;
+struct draw_vs_variant_generic {
+ struct draw_vs_variant base;
struct draw_vertex_shader *shader;
struct draw_context *draw;
@@ -63,13 +63,13 @@ struct draw_vs_varient_generic {
-static void vsvg_set_buffer( struct draw_vs_varient *varient,
+static void vsvg_set_buffer( struct draw_vs_variant *variant,
unsigned buffer,
const void *ptr,
unsigned stride,
unsigned max_index )
{
- struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+ struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
vsvg->fetch->set_buffer(vsvg->fetch,
buffer,
@@ -81,7 +81,7 @@ static void vsvg_set_buffer( struct draw_vs_varient *varient,
/* Mainly for debug at this stage:
*/
-static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
+static void do_rhw_viewport( struct draw_vs_variant_generic *vsvg,
unsigned count,
void *output_buffer )
{
@@ -104,7 +104,7 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
}
}
-static void do_viewport( struct draw_vs_varient_generic *vsvg,
+static void do_viewport( struct draw_vs_variant_generic *vsvg,
unsigned count,
void *output_buffer )
{
@@ -126,12 +126,12 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg,
}
-static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
+static void PIPE_CDECL vsvg_run_elts( struct draw_vs_variant *variant,
const unsigned *elts,
unsigned count,
void *output_buffer)
{
- struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+ struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
@@ -193,12 +193,12 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
}
-static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
+static void PIPE_CDECL vsvg_run_linear( struct draw_vs_variant *variant,
unsigned start,
unsigned count,
void *output_buffer )
{
- struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+ struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
@@ -259,20 +259,20 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
-static void vsvg_destroy( struct draw_vs_varient *varient )
+static void vsvg_destroy( struct draw_vs_variant *variant )
{
- FREE(varient);
+ FREE(variant);
}
-struct draw_vs_varient *
-draw_vs_create_varient_generic( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key )
+struct draw_vs_variant *
+draw_vs_create_variant_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_variant_key *key )
{
unsigned i;
struct translate_key fetch, emit;
- struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic );
+ struct draw_vs_variant_generic *vsvg = CALLOC_STRUCT( draw_vs_variant_generic );
if (vsvg == NULL)
return NULL;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h
index c749a7a315..69718eb4b3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
@@ -125,4 +125,22 @@ lp_build_const_float(struct gallivm_state *gallivm, float x)
}
+/** Return constant-valued pointer to int */
+static INLINE LLVMValueRef
+lp_build_const_int_pointer(struct gallivm_state *gallivm, const void *ptr)
+{
+ LLVMTypeRef int_type;
+ LLVMValueRef v;
+
+ /* int type large enough to hold a pointer */
+ int_type = LLVMIntTypeInContext(gallivm->context, 8 * sizeof(void *));
+ v = LLVMConstInt(int_type, (uintptr_t) ptr, 0);
+ v = LLVMBuildIntToPtr(gallivm->builder, v,
+ LLVMPointerType(int_type, 0),
+ "cast int to ptr");
+ return v;
+}
+
+
+
#endif /* !LP_BLD_CONST_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index c43ee8ac63..c261d76116 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -231,44 +231,53 @@ lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm,
assert(dst_type.floating);
- /* Special-case int8->float, though most cases could be handled
- * this way:
- */
- if (src_width == 8) {
- scale = 1.0/255.0;
+ mantissa = lp_mantissa(dst_type);
+
+ if (src_width <= (mantissa + 1)) {
+ /*
+ * The source width matches fits what can be represented in floating
+ * point (i.e., mantissa + 1 bits). So do a straight multiplication
+ * followed by casting. No further rounding is necessary.
+ */
+
+ scale = 1.0/(double)((1ULL << src_width) - 1);
res = LLVMBuildSIToFP(builder, src, vec_type, "");
res = LLVMBuildFMul(builder, res,
lp_build_const_vec(gallivm, dst_type, scale), "");
return res;
}
+ else {
+ /*
+ * The source width exceeds what can be represented in floating
+ * point. So truncate the incoming values.
+ */
- mantissa = lp_mantissa(dst_type);
-
- n = MIN2(mantissa, src_width);
+ n = MIN2(mantissa, src_width);
- ubound = ((unsigned long long)1 << n);
- mask = ubound - 1;
- scale = (double)ubound/mask;
- bias = (double)((unsigned long long)1 << (mantissa - n));
+ ubound = ((unsigned long long)1 << n);
+ mask = ubound - 1;
+ scale = (double)ubound/mask;
+ bias = (double)((unsigned long long)1 << (mantissa - n));
- res = src;
+ res = src;
- if(src_width > mantissa) {
- int shift = src_width - mantissa;
- res = LLVMBuildLShr(builder, res,
- lp_build_const_int_vec(gallivm, dst_type, shift), "");
- }
+ if (src_width > mantissa) {
+ int shift = src_width - mantissa;
+ res = LLVMBuildLShr(builder, res,
+ lp_build_const_int_vec(gallivm, dst_type, shift), "");
+ }
- bias_ = lp_build_const_vec(gallivm, dst_type, bias);
+ bias_ = lp_build_const_vec(gallivm, dst_type, bias);
- res = LLVMBuildOr(builder,
- res,
- LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");
+ res = LLVMBuildOr(builder,
+ res,
+ LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");
- res = LLVMBuildBitCast(builder, res, vec_type, "");
+ res = LLVMBuildBitCast(builder, res, vec_type, "");
- res = LLVMBuildFSub(builder, res, bias_, "");
- res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), "");
+ res = LLVMBuildFSub(builder, res, bias_, "");
+ res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), "");
+ }
return res;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
deleted file mode 100644
index 93e56553d7..0000000000
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c
+++ /dev/null
@@ -1,141 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifdef HAVE_UDIS86
-#include <udis86.h>
-#endif
-
-#include "util/u_math.h"
-#include "util/u_debug.h"
-#include "lp_bld_debug.h"
-
-
-/**
- * Check alignment.
- *
- * It is important that this check is not implemented as a macro or inlined
- * function, as the compiler assumptions in respect to alignment of global
- * and stack variables would often make the check a no op, defeating the
- * whole purpose of the exercise.
- */
-boolean
-lp_check_alignment(const void *ptr, unsigned alignment)
-{
- assert(util_is_power_of_two(alignment));
- return ((uintptr_t)ptr & (alignment - 1)) == 0;
-}
-
-
-void
-lp_disassemble(const void* func)
-{
-#ifdef HAVE_UDIS86
- ud_t ud_obj;
- uint64_t max_jmp_pc;
- uint inst_no;
- boolean emit_addrs = TRUE, emit_line_nos = FALSE;
-
- ud_init(&ud_obj);
-
- ud_set_input_buffer(&ud_obj, (void*)func, 0xffff);
-
- max_jmp_pc = (uint64_t) (uintptr_t) func;
- ud_set_pc(&ud_obj, max_jmp_pc);
-
-#ifdef PIPE_ARCH_X86
- ud_set_mode(&ud_obj, 32);
-#endif
-#ifdef PIPE_ARCH_X86_64
- ud_set_mode(&ud_obj, 64);
-#endif
-
- ud_set_syntax(&ud_obj, UD_SYN_ATT);
-
- while (ud_disassemble(&ud_obj)) {
-
- if (emit_addrs) {
-#ifdef PIPE_ARCH_X86
- debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
-#endif
-#ifdef PIPE_ARCH_X86_64
- debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
-#endif
- }
- else if (emit_line_nos) {
- debug_printf("%6d:\t", inst_no);
- inst_no++;
- }
-#if 0
- debug_printf("%-16s ", ud_insn_hex(&ud_obj));
-#endif
-
- debug_printf("%s\n", ud_insn_asm(&ud_obj));
-
- if(ud_obj.mnemonic != UD_Icall) {
- unsigned i;
- for(i = 0; i < 3; ++i) {
- const struct ud_operand *op = &ud_obj.operand[i];
- if (op->type == UD_OP_JIMM){
- uint64_t pc = ud_obj.pc;
-
- switch (op->size) {
- case 8:
- pc += op->lval.sbyte;
- break;
- case 16:
- pc += op->lval.sword;
- break;
- case 32:
- pc += op->lval.sdword;
- break;
- default:
- break;
- }
- if(pc > max_jmp_pc)
- max_jmp_pc = pc;
- }
- }
- }
-
- if (ud_obj.mnemonic == UD_Iinvalid ||
- (ud_insn_off(&ud_obj) >= max_jmp_pc &&
- (ud_obj.mnemonic == UD_Iret ||
- ud_obj.mnemonic == UD_Ijmp)))
- break;
- }
-
-#if 0
- /* Print GDB command, useful to verify udis86 output */
- debug_printf("disassemble %p %p\n", func, (void*)(uintptr_t)ud_obj.pc);
-#endif
-
- debug_printf("\n");
-#else
- (void)func;
-#endif
-}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
new file mode 100644
index 0000000000..bb2c82fe0e
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -0,0 +1,357 @@
+/**************************************************************************
+ *
+ * Copyright 2009-2011 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <llvm-c/Core.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Target/TargetRegistry.h>
+#include <llvm/Target/TargetSelect.h>
+#include <llvm/Target/TargetInstrInfo.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Support/MemoryObject.h>
+#include <llvm/System/Host.h>
+
+#if HAVE_LLVM >= 0x0207
+#include <llvm/MC/MCDisassembler.h>
+#include <llvm/MC/MCAsmInfo.h>
+#include <llvm/MC/MCInst.h>
+#include <llvm/MC/MCInstPrinter.h>
+#endif /* HAVE_LLVM >= 0x0207 */
+
+#include "util/u_math.h"
+#include "util/u_debug.h"
+
+#include "lp_bld_debug.h"
+
+
+
+/**
+ * Check alignment.
+ *
+ * It is important that this check is not implemented as a macro or inlined
+ * function, as the compiler assumptions in respect to alignment of global
+ * and stack variables would often make the check a no op, defeating the
+ * whole purpose of the exercise.
+ */
+extern "C" boolean
+lp_check_alignment(const void *ptr, unsigned alignment)
+{
+ assert(util_is_power_of_two(alignment));
+ return ((uintptr_t)ptr & (alignment - 1)) == 0;
+}
+
+
+class raw_debug_ostream :
+ public llvm::raw_ostream
+{
+ uint64_t pos;
+
+ void write_impl(const char *Ptr, size_t Size);
+ uint64_t current_pos() { return pos; }
+ uint64_t current_pos() const { return pos; }
+
+#if HAVE_LLVM >= 0x207
+ uint64_t preferred_buffer_size() { return 512; }
+#else
+ size_t preferred_buffer_size() { return 512; }
+#endif
+};
+
+
+void
+raw_debug_ostream::write_impl(const char *Ptr, size_t Size)
+{
+ if (Size > 0) {
+ char *lastPtr = (char *)&Ptr[Size];
+ char last = *lastPtr;
+ *lastPtr = 0;
+ _debug_printf("%*s", Size, Ptr);
+ *lastPtr = last;
+ pos += Size;
+ }
+}
+
+
+/**
+ * Same as LLVMDumpValue, but through our debugging channels.
+ */
+extern "C" void
+lp_debug_dump_value(LLVMValueRef value)
+{
+#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBDDED)
+ raw_debug_ostream os;
+ llvm::unwrap(value)->print(os);
+ os.flush();
+#else
+ LLVMDumpValue(value);
+#endif
+}
+
+
+#if HAVE_LLVM >= 0x0207
+/*
+ * MemoryObject wrapper around a buffer of memory, to be used by MC
+ * disassembler.
+ */
+class BufferMemoryObject:
+ public llvm::MemoryObject
+{
+private:
+ const uint8_t *Bytes;
+ uint64_t Length;
+public:
+ BufferMemoryObject(const uint8_t *bytes, uint64_t length) :
+ Bytes(bytes), Length(length)
+ {
+ }
+
+ uint64_t getBase() const
+ {
+ return 0;
+ }
+
+ uint64_t getExtent() const
+ {
+ return Length;
+ }
+
+ int readByte(uint64_t addr, uint8_t *byte) const
+ {
+ if (addr > getExtent())
+ return -1;
+ *byte = Bytes[addr];
+ return 0;
+ }
+};
+#endif /* HAVE_LLVM >= 0x0207 */
+
+
+/*
+ * Disassemble a function, using the LLVM MC disassembler.
+ *
+ * See also:
+ * - http://blog.llvm.org/2010/01/x86-disassembler.html
+ * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
+ */
+extern "C" void
+lp_disassemble(const void* func)
+{
+#if HAVE_LLVM >= 0x0207
+ using namespace llvm;
+
+ const uint8_t *bytes = (const uint8_t *)func;
+
+ /*
+ * Limit disassembly to this extent
+ */
+ const uint64_t extent = 0x10000;
+
+ uint64_t max_pc = 0;
+
+ /*
+ * Initialize all used objects.
+ */
+
+ std::string Triple = sys::getHostTriple();
+
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget(Triple, Error);
+
+#if HAVE_LLVM >= 0x0208
+ InitializeNativeTargetAsmPrinter();
+#else
+ InitializeAllAsmPrinters();
+#endif
+
+ InitializeAllDisassemblers();
+
+ OwningPtr<const MCAsmInfo> AsmInfo(T->createAsmInfo(Triple));
+
+ if (!AsmInfo) {
+ debug_printf("error: no assembly info for target %s\n", Triple.c_str());
+ return;
+ }
+
+ OwningPtr<const MCDisassembler> DisAsm(T->createMCDisassembler());
+ if (!DisAsm) {
+ debug_printf("error: no disassembler for target %s\n", Triple.c_str());
+ return;
+ }
+
+ raw_debug_ostream Out;
+
+ int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+#if HAVE_LLVM >= 0x0208
+ OwningPtr<MCInstPrinter> Printer(
+ T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo));
+#else
+ OwningPtr<MCInstPrinter> Printer(
+ T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, Out));
+#endif
+ if (!Printer) {
+ debug_printf("error: no instruction printer for target %s\n", Triple.c_str());
+ return;
+ }
+
+ TargetMachine *TM = T->createTargetMachine(Triple, "");
+
+ const TargetInstrInfo *TII = TM->getInstrInfo();
+
+ /*
+ * Wrap the data in a MemoryObject
+ */
+ BufferMemoryObject memoryObject((const uint8_t *)bytes, extent);
+
+ uint64_t pc;
+ pc = 0;
+ while (true) {
+ MCInst Inst;
+ uint64_t Size;
+
+ /*
+ * Print address. We use addresses relative to the start of the function,
+ * so that between runs.
+ */
+
+ debug_printf("%6lu:\t", (unsigned long)pc);
+
+ if (!DisAsm->getInstruction(Inst, Size, memoryObject,
+ pc,
+ nulls())) {
+ debug_printf("invalid\n");
+ pc += 1;
+ }
+
+ /*
+ * Output the bytes in hexidecimal format.
+ */
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < Size; ++i) {
+ debug_printf("%02x ", ((const uint8_t*)bytes)[pc + i]);
+ }
+ for (; i < 16; ++i) {
+ debug_printf(" ");
+ }
+ }
+
+ /*
+ * Print the instruction.
+ */
+
+#if HAVE_LLVM >= 0x208
+ Printer->printInst(&Inst, Out);
+#else
+ Printer->printInst(&Inst);
+#endif
+ Out.flush();
+
+ /*
+ * Advance.
+ */
+
+ pc += Size;
+
+ const TargetInstrDesc &TID = TII->get(Inst.getOpcode());
+
+ /*
+ * Keep track of forward jumps to a nearby address.
+ */
+
+ if (TID.isBranch()) {
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+ const MCOperand &operand = Inst.getOperand(i);
+ if (operand.isImm()) {
+ uint64_t jump;
+
+ /*
+ * FIXME: Handle both relative and absolute addresses correctly.
+ * EDInstInfo actually has this info, but operandTypes and
+ * operandFlags enums are not exposed in the public interface.
+ */
+
+ if (1) {
+ /*
+ * PC relative addr.
+ */
+
+ jump = pc + operand.getImm();
+ } else {
+ /*
+ * Absolute addr.
+ */
+
+ jump = (uint64_t)operand.getImm();
+ }
+
+ /*
+ * Output the address relative to the function start, given
+ * that MC will print the addresses relative the current pc.
+ */
+ debug_printf("\t\t; %lu", (unsigned long)jump);
+
+ /*
+ * Ignore far jumps given it could be actually a tail return to
+ * a random address.
+ */
+
+ if (jump > max_pc &&
+ jump < extent) {
+ max_pc = jump;
+ }
+ }
+ }
+ }
+
+ debug_printf("\n");
+
+ /*
+ * Stop disassembling on return statements, if there is no record of a
+ * jump to a successive address.
+ */
+
+ if (TID.isReturn()) {
+ if (pc > max_pc) {
+ break;
+ }
+ }
+ }
+
+ /*
+ * Print GDB command, useful to verify output.
+ */
+
+ if (0) {
+ debug_printf("disassemble %p %p\n", bytes, bytes + pc);
+ }
+
+ debug_printf("\n");
+#else /* HAVE_LLVM < 0x0207 */
+ (void)func;
+#endif /* HAVE_LLVM < 0x0207 */
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index 8a58f95b78..da873f30b2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -45,6 +45,11 @@
#define GALLIVM_DEBUG_GC (1 << 6)
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
#ifdef DEBUG
extern unsigned gallivm_debug;
#else
@@ -81,4 +86,9 @@ void
lp_disassemble(const void* func);
+#ifdef __cplusplus
+}
+#endif
+
+
#endif /* !LP_BLD_DEBUG_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 75d2e666f0..82ab19eda1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -36,6 +36,7 @@
#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_pointer.h"
#include "util/u_string.h"
#include "lp_bld_arit.h"
@@ -511,8 +512,6 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
* or incentive to optimize.
*/
- LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
- char name[256];
LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
@@ -522,19 +521,20 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
LLVMValueRef res;
unsigned k;
- util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
- format_desc->short_name);
-
if (gallivm_debug & GALLIVM_DEBUG_PERF) {
- debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
+ debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
+ __FUNCTION__, format_desc->short_name);
}
/*
* Declare and bind format_desc->fetch_rgba_8unorm().
*/
- function = LLVMGetNamedFunction(module, name);
- if (!function) {
+ {
+ /*
+ * Function to call looks like:
+ * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+ */
LLVMTypeRef ret_type;
LLVMTypeRef arg_types[4];
LLVMTypeRef function_type;
@@ -542,17 +542,19 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
ret_type = LLVMVoidTypeInContext(gallivm->context);
arg_types[0] = pi8t;
arg_types[1] = pi8t;
- arg_types[3] = arg_types[2] = LLVMIntTypeInContext(gallivm->context, sizeof(unsigned) * 8);
- function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
- function = LLVMAddFunction(module, name, function_type);
-
- LLVMSetFunctionCallConv(function, LLVMCCallConv);
- LLVMSetLinkage(function, LLVMExternalLinkage);
-
- assert(LLVMIsDeclaration(function));
-
- LLVMAddGlobalMapping(gallivm->engine, function,
- func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm));
+ arg_types[2] = i32t;
+ arg_types[3] = i32t;
+ function_type = LLVMFunctionType(ret_type, arg_types,
+ Elements(arg_types), 0);
+
+ /* make const pointer for the C fetch_rgba_8unorm function */
+ function = lp_build_const_int_pointer(gallivm,
+ func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
+
+ /* cast the callee pointer to the function's type */
+ function = LLVMBuildBitCast(builder, function,
+ LLVMPointerType(function_type, 0),
+ "cast callee");
}
tmp_ptr = lp_build_alloca(gallivm, i32t, "");
@@ -614,48 +616,55 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
* or incentive to optimize.
*/
- LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
- char name[256];
LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
+ LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
+ LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef function;
LLVMValueRef tmp_ptr;
LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
LLVMValueRef res;
unsigned k;
- util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
- format_desc->short_name);
-
if (gallivm_debug & GALLIVM_DEBUG_PERF) {
- debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
+ debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
+ __FUNCTION__, format_desc->short_name);
}
/*
* Declare and bind format_desc->fetch_rgba_float().
*/
- function = LLVMGetNamedFunction(module, name);
- if (!function) {
+ {
+ /*
+ * Function to call looks like:
+ * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
+ */
LLVMTypeRef ret_type;
LLVMTypeRef arg_types[4];
LLVMTypeRef function_type;
ret_type = LLVMVoidTypeInContext(gallivm->context);
arg_types[0] = pf32t;
- arg_types[1] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
- arg_types[3] = arg_types[2] = LLVMIntTypeInContext(gallivm->context, sizeof(unsigned) * 8);
- function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
- function = LLVMAddFunction(module, name, function_type);
+ arg_types[1] = pi8t;
+ arg_types[2] = i32t;
+ arg_types[3] = i32t;
+ function_type = LLVMFunctionType(ret_type, arg_types,
+ Elements(arg_types), 0);
- LLVMSetFunctionCallConv(function, LLVMCCallConv);
- LLVMSetLinkage(function, LLVMExternalLinkage);
+ /* Note: we're using this casting here instead of LLVMAddGlobalMapping()
+ * to work around a bug in LLVM 2.6, and for efficiency/simplicity.
+ */
- assert(LLVMIsDeclaration(function));
+ /* make const pointer for the C fetch_rgba_float function */
+ function = lp_build_const_int_pointer(gallivm,
+ func_to_pointer((func_pointer) format_desc->fetch_rgba_float));
- LLVMAddGlobalMapping(gallivm->engine, function,
- func_to_pointer((func_pointer)format_desc->fetch_rgba_float));
+ /* cast the callee pointer to the function's type */
+ function = LLVMBuildBitCast(builder, function,
+ LLVMPointerType(function_type, 0),
+ "cast callee");
}
tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index efe8d38b8f..45addee8fa 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -30,6 +30,7 @@
#include "util/u_cpu_detect.h"
#include "util/u_debug.h"
#include "util/u_memory.h"
+#include "util/u_simple_list.h"
#include "lp_bld_debug.h"
#include "lp_bld_init.h"
@@ -221,11 +222,12 @@ free_gallivm_state(struct gallivm_state *gallivm)
static boolean
init_gallivm_state(struct gallivm_state *gallivm)
{
- assert(gallivm_initialized);
assert(!gallivm->context);
assert(!gallivm->module);
assert(!gallivm->provider);
+ lp_build_init();
+
gallivm->context = LLVMContextCreate();
if (!gallivm->context)
goto fail;
@@ -291,12 +293,12 @@ struct callback
{
garbage_collect_callback_func func;
void *cb_data;
+ struct callback *prev, *next;
};
-#define MAX_CALLBACKS 32
-static struct callback Callbacks[MAX_CALLBACKS];
-static unsigned NumCallbacks = 0;
+/** list of all garbage collector callbacks */
+static struct callback callback_list = {NULL, NULL, NULL, NULL};
/**
@@ -307,20 +309,24 @@ void
gallivm_register_garbage_collector_callback(garbage_collect_callback_func func,
void *cb_data)
{
- unsigned i;
+ struct callback *cb;
+
+ if (!callback_list.prev) {
+ make_empty_list(&callback_list);
+ }
- for (i = 0; i < NumCallbacks; i++) {
- if (Callbacks[i].func == func && Callbacks[i].cb_data == cb_data) {
- /* already in list: no-op */
+ /* see if already in list */
+ foreach(cb, &callback_list) {
+ if (cb->func == func && cb->cb_data == cb_data)
return;
- }
}
- assert(NumCallbacks < MAX_CALLBACKS);
- if (NumCallbacks < MAX_CALLBACKS) {
- Callbacks[NumCallbacks].func = func;
- Callbacks[NumCallbacks].cb_data = cb_data;
- NumCallbacks++;
+ /* add to list */
+ cb = CALLOC_STRUCT(callback);
+ if (cb) {
+ cb->func = func;
+ cb->cb_data = cb_data;
+ insert_at_head(&callback_list, cb);
}
}
@@ -332,15 +338,13 @@ void
gallivm_remove_garbage_collector_callback(garbage_collect_callback_func func,
void *cb_data)
{
- unsigned i;
-
- for (i = 0; i < NumCallbacks; i++) {
- if (Callbacks[i].func == func && Callbacks[i].cb_data == cb_data) {
- /* found, now remove it */
- NumCallbacks--;
- for ( ; i < NumCallbacks; i++) {
- Callbacks[i] = Callbacks[i + 1];
- }
+ struct callback *cb;
+
+ /* search list */
+ foreach(cb, &callback_list) {
+ if (cb->func == func && cb->cb_data == cb_data) {
+ /* found, remove it */
+ remove_from_list(cb);
return;
}
}
@@ -354,10 +358,9 @@ gallivm_remove_garbage_collector_callback(garbage_collect_callback_func func,
static void
call_garbage_collector_callbacks(void)
{
- unsigned i;
-
- for (i = 0; i < NumCallbacks; i++) {
- Callbacks[i].func(Callbacks[i].cb_data);
+ struct callback *cb;
+ foreach(cb, &callback_list) {
+ cb->func(cb->cb_data);
}
}
@@ -385,6 +388,9 @@ gallivm_garbage_collect(struct gallivm_state *gallivm)
void
lp_build_init(void)
{
+ if (gallivm_initialized)
+ return;
+
#ifdef DEBUG
gallivm_debug = debug_get_option_gallivm_debug();
#endif
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index f56ddee7fd..843a14a500 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -46,66 +46,6 @@
#include "util/u_debug.h"
-#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBDDED)
-
-#include "llvm/Support/raw_ostream.h"
-
-class raw_debug_ostream :
- public llvm::raw_ostream
-{
- uint64_t pos;
-
- void write_impl(const char *Ptr, size_t Size);
- uint64_t current_pos() { return pos; }
- uint64_t current_pos() const { return pos; }
-
-#if HAVE_LLVM >= 0x207
- uint64_t preferred_buffer_size() { return 512; }
-#else
- size_t preferred_buffer_size() { return 512; }
-#endif
-};
-
-
-void
-raw_debug_ostream::write_impl(const char *Ptr, size_t Size)
-{
- if (Size > 0) {
- char *lastPtr = (char *)&Ptr[Size];
- char last = *lastPtr;
- *lastPtr = 0;
- _debug_printf("%*s", Size, Ptr);
- *lastPtr = last;
- pos += Size;
- }
-}
-
-
-/**
- * Same as LLVMDumpValue, but through our debugging channels.
- */
-extern "C" void
-lp_debug_dump_value(LLVMValueRef value)
-{
- raw_debug_ostream os;
- llvm::unwrap(value)->print(os);
- os.flush();
-}
-
-
-#else
-
-
-extern "C" void
-lp_debug_dump_value(LLVMValueRef value)
-{
- LLVMDumpValue(value);
-}
-
-
-#endif
-
-
/**
* Register the engine with oprofile.
*
@@ -144,6 +84,7 @@ lp_set_target_options(void)
llvm::UnsafeFPMath = true;
#endif
+#if HAVE_LLVM < 0x0209
/*
* LLVM will generate MMX instructions for vectors <= 64 bits, leading to
* innefficient code, and in 32bit systems, to the corruption of the FPU
@@ -162,6 +103,7 @@ lp_set_target_options(void)
llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
first = FALSE;
}
+#endif
/*
* By default LLVM adds a signal handler to output a pretty stack trace.
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 991f6fa5ef..e61cf9541e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -1098,6 +1098,4 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
texel_out[2] = unswizzled[2];
texel_out[3] = unswizzled[3];
}
-
- apply_sampler_swizzle(bld, texel_out);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index cf46e2be83..9961ba08f3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -187,8 +187,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
border_chan_vec, texel_out[chan]);
}
}
-
- apply_sampler_swizzle(bld, texel_out);
}
@@ -834,14 +832,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
LLVMValueRef *colors_out)
{
LLVMBuilderRef builder = bld->gallivm->builder;
- LLVMValueRef size0;
- LLVMValueRef size1;
- LLVMValueRef row_stride0_vec;
- LLVMValueRef row_stride1_vec;
- LLVMValueRef img_stride0_vec;
- LLVMValueRef img_stride1_vec;
- LLVMValueRef data_ptr0;
- LLVMValueRef data_ptr1;
+ LLVMValueRef size0 = NULL;
+ LLVMValueRef size1 = NULL;
+ LLVMValueRef row_stride0_vec = NULL;
+ LLVMValueRef row_stride1_vec = NULL;
+ LLVMValueRef img_stride0_vec = NULL;
+ LLVMValueRef img_stride1_vec = NULL;
+ LLVMValueRef data_ptr0 = NULL;
+ LLVMValueRef data_ptr1 = NULL;
LLVMValueRef colors0[4], colors1[4];
unsigned chan;
@@ -1110,6 +1108,11 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
coord, tex);
}
+ /* Clamp p coords to [0,1] */
+ p = lp_build_clamp(&bld->coord_bld, p,
+ bld->coord_bld.zero,
+ bld->coord_bld.one);
+
/* result = (p FUNC texel) ? 1 : 0 */
res = lp_build_cmp(texel_bld, bld->static_state->compare_func,
p, texel[chan]);
@@ -1268,4 +1271,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
}
lp_build_sample_compare(&bld, r, texel_out);
+
+ apply_sampler_swizzle(&bld, texel_out);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 40186befb9..9713d10048 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -180,6 +180,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef consts_ptr,
+ LLVMValueRef system_values_array,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[4],
LLVMValueRef (*outputs)[4],
@@ -199,4 +200,11 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
const struct tgsi_shader_info *info);
+LLVMValueRef
+lp_build_system_values_array(struct gallivm_state *gallivm,
+ const struct tgsi_shader_info *info,
+ LLVMValueRef instance_id,
+ LLVMValueRef facing);
+
+
#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 1b5a8a5903..d1585c8e2b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -157,6 +157,8 @@ struct lp_build_tgsi_soa_context
*/
LLVMValueRef inputs_array;
+ LLVMValueRef system_values_array;
+
const struct tgsi_shader_info *info;
/** bitmask indicating which register files are accessed indirectly */
unsigned indirect_files;
@@ -759,6 +761,23 @@ emit_fetch(
}
break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(!reg->Register.Indirect);
+ {
+ LLVMValueRef index; /* index into the system value array */
+ LLVMValueRef scalar, scalar_ptr;
+
+ index = lp_build_const_int32(gallivm,
+ reg->Register.Index * 4 + swizzle);
+
+ scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array,
+ &index, 1, "");
+ scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+
+ res = lp_build_broadcast_scalar(&bld->base, scalar);
+ }
+ break;
+
default:
assert(0 && "invalid src register in emit_fetch()");
return bld->base.undef;
@@ -2322,6 +2341,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef consts_ptr,
+ LLVMValueRef system_values_array,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[NUM_CHANNELS],
LLVMValueRef (*outputs)[NUM_CHANNELS],
@@ -2411,6 +2431,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
}
}
+ bld.system_values_array = system_values_array;
+
tgsi_parse_init( &parse, tokens );
while( !tgsi_parse_end_of_tokens( &parse ) ) {
@@ -2512,3 +2534,54 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
FREE( bld.instructions );
}
+
+/**
+ * Build up the system values array out of individual values such as
+ * the instance ID, front-face, primitive ID, etc. The shader info is
+ * used to determine which system values are needed and where to put
+ * them in the system values array.
+ *
+ * XXX only instance ID is implemented at this time.
+ *
+ * The system values register file is similar to the constants buffer.
+ * Example declaration:
+ * DCL SV[0], INSTANCEID
+ * Example instruction:
+ * MOVE foo, SV[0].xxxx;
+ *
+ * \return LLVM float array (interpreted as float [][4])
+ */
+LLVMValueRef
+lp_build_system_values_array(struct gallivm_state *gallivm,
+ const struct tgsi_shader_info *info,
+ LLVMValueRef instance_id,
+ LLVMValueRef facing)
+{
+ LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values);
+ LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context);
+ LLVMValueRef array = lp_build_array_alloca(gallivm, float_t,
+ size, "sysvals_array");
+ unsigned i;
+
+ for (i = 0; i < info->num_system_values; i++) {
+ LLVMValueRef index = lp_build_const_int32(gallivm, i * 4);
+ LLVMValueRef ptr, value;
+
+ switch (info->system_value_semantic_name[i]) {
+ case TGSI_SEMANTIC_INSTANCEID:
+ /* convert instance ID from int to float */
+ value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t,
+ "sysval_instanceid");
+ break;
+ case TGSI_SEMANTIC_FACE:
+ /* fall-through */
+ default:
+ assert(0 && "unexpected semantic in build_system_values_array()");
+ }
+
+ ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, "");
+ LLVMBuildStore(gallivm->builder, value, ptr);
+ }
+
+ return array;
+}
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index 64e3869a74..3ad7ce645d 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -152,8 +152,9 @@ static INLINE int pipe_thread_destroy( pipe_thread thread )
*/
typedef CRITICAL_SECTION pipe_mutex;
+/* http://locklessinc.com/articles/pthreads_on_windows/ */
#define pipe_static_mutex(mutex) \
- /*static*/ pipe_mutex mutex = {0,0,0,0,0,0}
+ static pipe_mutex mutex = {(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0}
#define pipe_mutex_init(mutex) \
InitializeCriticalSection(&mutex)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index c2322eed19..5754f47618 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -174,11 +174,20 @@ pb_malloc_bufmgr_destroy(struct pb_manager *mgr)
}
+static boolean
+pb_malloc_bufmgr_is_buffer_busy( struct pb_manager *mgr,
+ struct pb_buffer *buf )
+{
+ return FALSE;
+}
+
+
static struct pb_manager
pb_malloc_bufmgr = {
pb_malloc_bufmgr_destroy,
pb_malloc_bufmgr_create_buffer,
- pb_malloc_bufmgr_flush
+ pb_malloc_bufmgr_flush,
+ pb_malloc_bufmgr_is_buffer_busy
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index 2ef02160f2..960068c494 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -82,6 +82,10 @@ struct pb_manager
*/
void
(*flush)( struct pb_manager *mgr );
+
+ boolean
+ (*is_buffer_busy)( struct pb_manager *mgr,
+ struct pb_buffer *buf );
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index a6eb403962..25accefa8d 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -227,8 +227,6 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
pb_size size,
const struct pb_desc *desc)
{
- void *map;
-
if(buf->base.base.size < size)
return 0;
@@ -242,13 +240,18 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
if(!pb_check_usage(desc->usage, buf->base.base.usage))
return 0;
- map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL);
- if (!map) {
- return -1;
+ if (buf->mgr->provider->is_buffer_busy) {
+ if (buf->mgr->provider->is_buffer_busy(buf->mgr->provider, buf->buffer))
+ return -1;
+ } else {
+ void *ptr = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL);
+
+ if (!ptr)
+ return -1;
+
+ pb_unmap(buf->buffer);
}
- pb_unmap(buf->buffer);
-
return 1;
}
diff --git a/src/gallium/auxiliary/rbug/rbug_context.c b/src/gallium/auxiliary/rbug/rbug_context.c
index a3fd7e8430..36824058c7 100644
--- a/src/gallium/auxiliary/rbug/rbug_context.c
+++ b/src/gallium/auxiliary/rbug/rbug_context.c
@@ -288,7 +288,6 @@ int rbug_send_context_draw_rule(struct rbug_connection *__con,
int rbug_send_context_flush(struct rbug_connection *__con,
rbug_context_t context,
- int32_t flags,
uint32_t *__serial)
{
uint32_t __len = 0;
@@ -298,7 +297,6 @@ int rbug_send_context_flush(struct rbug_connection *__con,
LEN(8); /* header */
LEN(8); /* context */
- LEN(4); /* flags */
/* align */
PAD(__len, 8);
@@ -310,7 +308,6 @@ int rbug_send_context_flush(struct rbug_connection *__con,
WRITE(4, int32_t, ((int32_t)RBUG_OP_CONTEXT_FLUSH));
WRITE(4, uint32_t, ((uint32_t)(__len / 4)));
WRITE(8, rbug_context_t, context); /* context */
- WRITE(4, int32_t, flags); /* flags */
/* final pad */
PAD(__pos, 8);
@@ -663,7 +660,6 @@ struct rbug_proto_context_flush * rbug_demarshal_context_flush(struct rbug_proto
ret->header.opcode = header->opcode;
READ(8, rbug_context_t, context); /* context */
- READ(4, int32_t, flags); /* flags */
return ret;
}
diff --git a/src/gallium/auxiliary/rbug/rbug_context.h b/src/gallium/auxiliary/rbug/rbug_context.h
index 03126d6b12..4a865c25fc 100644
--- a/src/gallium/auxiliary/rbug/rbug_context.h
+++ b/src/gallium/auxiliary/rbug/rbug_context.h
@@ -96,7 +96,6 @@ struct rbug_proto_context_flush
{
struct rbug_header header;
rbug_context_t context;
- int32_t flags;
};
struct rbug_proto_context_list_reply
@@ -162,7 +161,6 @@ int rbug_send_context_draw_rule(struct rbug_connection *__con,
int rbug_send_context_flush(struct rbug_connection *__con,
rbug_context_t context,
- int32_t flags,
uint32_t *__serial);
int rbug_send_context_list_reply(struct rbug_connection *__con,
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 75b0f6a68e..b03dd3a0cf 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -2132,7 +2132,8 @@ struct x86_reg x86_fn_arg( struct x86_function *p,
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 4); /* ??? */
default:
- abort();
+ assert(0 && "Unexpected x86 target ABI in x86_fn_arg");
+ return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */
}
}
diff --git a/src/gallium/auxiliary/target-helpers/inline_noop_helper.h b/src/gallium/auxiliary/target-helpers/inline_noop_helper.h
new file mode 100644
index 0000000000..77c7cfd0c2
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/inline_noop_helper.h
@@ -0,0 +1,51 @@
+
+#ifndef INLINE_DEBUG_HELPER_H
+#define INLINE_DEBUG_HELPER_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+
+/* Helper function to wrap a screen with
+ * one or more debug driver: rbug, trace.
+ */
+
+#ifdef GALLIUM_TRACE
+#include "trace/tr_public.h"
+#endif
+
+#ifdef GALLIUM_RBUG
+#include "rbug/rbug_public.h"
+#endif
+
+#ifdef GALLIUM_GALAHAD
+#include "galahad/glhd_public.h"
+#endif
+
+#ifdef GALLIUM_NOOP
+#include "noop/noop_public.h"
+#endif
+
+static INLINE struct pipe_screen *
+debug_screen_wrap(struct pipe_screen *screen)
+{
+#if defined(GALLIUM_RBUG)
+ screen = rbug_screen_create(screen);
+#endif
+
+#if defined(GALLIUM_TRACE)
+ screen = trace_screen_create(screen);
+#endif
+
+#if defined(GALLIUM_GALAHAD)
+ screen = galahad_screen_create(screen);
+#endif
+
+#if defined(GALLIUM_NOOP)
+ screen = noop_screen_create(screen);
+#endif
+
+ return screen;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 16a205f206..d2cb98e84a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -26,6 +26,7 @@
**************************************************************************/
#include "util/u_debug.h"
+#include "pipe/p_format.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_build.h"
#include "tgsi_parse.h"
@@ -226,6 +227,45 @@ tgsi_build_declaration_semantic(
return ds;
}
+
+static struct tgsi_declaration_resource
+tgsi_default_declaration_resource(void)
+{
+ struct tgsi_declaration_resource declaration_resource;
+
+ declaration_resource.Resource = TGSI_TEXTURE_UNKNOWN;
+ declaration_resource.ReturnTypeX = PIPE_TYPE_UNORM;
+ declaration_resource.ReturnTypeY = PIPE_TYPE_UNORM;
+ declaration_resource.ReturnTypeZ = PIPE_TYPE_UNORM;
+ declaration_resource.ReturnTypeW = PIPE_TYPE_UNORM;
+
+ return declaration_resource;
+}
+
+static struct tgsi_declaration_resource
+tgsi_build_declaration_resource(unsigned texture,
+ unsigned return_type_x,
+ unsigned return_type_y,
+ unsigned return_type_z,
+ unsigned return_type_w,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header)
+{
+ struct tgsi_declaration_resource declaration_resource;
+
+ declaration_resource = tgsi_default_declaration_resource();
+ declaration_resource.Resource = texture;
+ declaration_resource.ReturnTypeX = return_type_x;
+ declaration_resource.ReturnTypeY = return_type_y;
+ declaration_resource.ReturnTypeZ = return_type_z;
+ declaration_resource.ReturnTypeW = return_type_w;
+
+ declaration_grow(declaration, header);
+
+ return declaration_resource;
+}
+
+
struct tgsi_full_declaration
tgsi_default_full_declaration( void )
{
@@ -235,6 +275,7 @@ tgsi_default_full_declaration( void )
full_declaration.Range = tgsi_default_declaration_range();
full_declaration.Semantic = tgsi_default_declaration_semantic();
full_declaration.ImmediateData.u = NULL;
+ full_declaration.Resource = tgsi_default_declaration_resource();
return full_declaration;
}
@@ -324,6 +365,24 @@ tgsi_build_full_declaration(
}
}
+ if (full_decl->Declaration.File == TGSI_FILE_RESOURCE) {
+ struct tgsi_declaration_resource *dr;
+
+ if (maxsize <= size) {
+ return 0;
+ }
+ dr = (struct tgsi_declaration_resource *)&tokens[size];
+ size++;
+
+ *dr = tgsi_build_declaration_resource(full_decl->Resource.Resource,
+ full_decl->Resource.ReturnTypeX,
+ full_decl->Resource.ReturnTypeY,
+ full_decl->Resource.ReturnTypeZ,
+ full_decl->Resource.ReturnTypeW,
+ declaration,
+ header);
+ }
+
return size;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 77bde86684..c12662076b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -104,7 +104,8 @@ tgsi_file_names[TGSI_FILE_COUNT] =
"PRED",
"SV",
"IMMX",
- "TEMPX"
+ "TEMPX",
+ "RES"
};
static const char *interpolate_names[] =
@@ -138,7 +139,7 @@ static const char *immediate_type_names[] =
};
const char *
-tgsi_swizzle_names[] =
+tgsi_swizzle_names[4] =
{
"x",
"y",
@@ -147,7 +148,7 @@ tgsi_swizzle_names[] =
};
const char *
-tgsi_texture_names[] =
+tgsi_texture_names[TGSI_TEXTURE_COUNT] =
{
"UNKNOWN",
"1D",
@@ -157,19 +158,31 @@ tgsi_texture_names[] =
"RECT",
"SHADOW1D",
"SHADOW2D",
- "SHADOWRECT"
+ "SHADOWRECT",
+ "1DARRAY",
+ "2DARRAY"
};
-static const char *property_names[] =
+const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
{
"GS_INPUT_PRIMITIVE",
"GS_OUTPUT_PRIMITIVE",
"GS_MAX_OUTPUT_VERTICES",
"FS_COORD_ORIGIN",
- "FS_COORD_PIXEL_CENTER"
+ "FS_COORD_PIXEL_CENTER",
+ "FS_COLOR0_WRITES_ALL_CBUFS",
};
-static const char *primitive_names[] =
+static const char *tgsi_type_names[] =
+{
+ "UNORM",
+ "SNORM",
+ "SINT",
+ "UINT",
+ "FLOAT"
+};
+
+const char *tgsi_primitive_names[PIPE_PRIM_MAX] =
{
"POINTS",
"LINES",
@@ -187,13 +200,13 @@ static const char *primitive_names[] =
"TRIANGLE_STRIP_ADJACENCY"
};
-static const char *fs_coord_origin_names[] =
+const char *tgsi_fs_coord_origin_names[2] =
{
"UPPER_LEFT",
"LOWER_LEFT"
};
-static const char *fs_coord_pixel_center_names[] =
+const char *tgsi_fs_coord_pixel_center_names[2] =
{
"HALF_INTEGER",
"INTEGER"
@@ -392,6 +405,26 @@ iter_declaration(
}
}
+ if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
+ TXT(", ");
+ ENM(decl->Resource.Resource, tgsi_texture_names);
+ TXT(", ");
+ if ((decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeY) &&
+ (decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeZ) &&
+ (decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeW)) {
+ ENM(decl->Resource.ReturnTypeX, tgsi_type_names);
+ } else {
+ ENM(decl->Resource.ReturnTypeX, tgsi_type_names);
+ TXT(", ");
+ ENM(decl->Resource.ReturnTypeY, tgsi_type_names);
+ TXT(", ");
+ ENM(decl->Resource.ReturnTypeZ, tgsi_type_names);
+ TXT(", ");
+ ENM(decl->Resource.ReturnTypeW, tgsi_type_names);
+ }
+
+ }
+
if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT &&
decl->Declaration.File == TGSI_FILE_INPUT)
{
@@ -484,10 +517,10 @@ iter_property(
int i;
struct dump_ctx *ctx = (struct dump_ctx *)iter;
- assert(Elements(property_names) == TGSI_PROPERTY_COUNT);
+ assert(Elements(tgsi_property_names) == TGSI_PROPERTY_COUNT);
TXT( "PROPERTY " );
- ENM(prop->Property.PropertyName, property_names);
+ ENM(prop->Property.PropertyName, tgsi_property_names);
if (prop->Property.NrTokens > 1)
TXT(" ");
@@ -496,13 +529,13 @@ iter_property(
switch (prop->Property.PropertyName) {
case TGSI_PROPERTY_GS_INPUT_PRIM:
case TGSI_PROPERTY_GS_OUTPUT_PRIM:
- ENM(prop->u[i].Data, primitive_names);
+ ENM(prop->u[i].Data, tgsi_primitive_names);
break;
case TGSI_PROPERTY_FS_COORD_ORIGIN:
- ENM(prop->u[i].Data, fs_coord_origin_names);
+ ENM(prop->u[i].Data, tgsi_fs_coord_origin_names);
break;
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
- ENM(prop->u[i].Data, fs_coord_pixel_center_names);
+ ENM(prop->u[i].Data, tgsi_fs_coord_pixel_center_names);
break;
default:
SID( prop->u[i].Data );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index fc0429ad8d..2491e9198a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -29,6 +29,7 @@
#define TGSI_DUMP_H
#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
@@ -39,10 +40,22 @@ extern const char *
tgsi_file_names[TGSI_FILE_COUNT];
extern const char *
-tgsi_swizzle_names[];
+tgsi_swizzle_names[4];
extern const char *
-tgsi_texture_names[];
+tgsi_texture_names[TGSI_TEXTURE_COUNT];
+
+extern const char *
+tgsi_property_names[TGSI_PROPERTY_COUNT];
+
+extern const char *
+tgsi_primitive_names[PIPE_PRIM_MAX];
+
+extern const char *
+tgsi_fs_coord_origin_names[2];
+
+extern const char *
+tgsi_fs_coord_pixel_center_names[2];
void
tgsi_dump_str(
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 7892a67f04..9cf74a838f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -672,6 +672,31 @@ tgsi_exec_machine_bind_shader(
mach->Processor = parse.FullHeader.Processor.Processor;
mach->ImmLimit = 0;
+ if (mach->Processor == TGSI_PROCESSOR_GEOMETRY &&
+ !mach->UsedGeometryShader) {
+ struct tgsi_exec_vector *inputs =
+ align_malloc(sizeof(struct tgsi_exec_vector) *
+ TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS,
+ 16);
+ struct tgsi_exec_vector *outputs =
+ align_malloc(sizeof(struct tgsi_exec_vector) *
+ TGSI_MAX_TOTAL_VERTICES, 16);
+
+ if (!inputs)
+ return;
+ if (!outputs) {
+ align_free(inputs);
+ return;
+ }
+
+ align_free(mach->Inputs);
+ align_free(mach->Outputs);
+
+ mach->Inputs = inputs;
+ mach->Outputs = outputs;
+ mach->UsedGeometryShader = TRUE;
+ }
+
declarations = (struct tgsi_full_declaration *)
MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
@@ -801,6 +826,11 @@ tgsi_exec_machine_create( void )
mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
+ mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
+ mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
+ if (!mach->Inputs || !mach->Outputs)
+ goto fail;
+
/* Setup constants needed by the SSE2 executor. */
for( i = 0; i < 4; i++ ) {
mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
@@ -824,7 +854,11 @@ tgsi_exec_machine_create( void )
return mach;
fail:
- align_free(mach);
+ if (mach) {
+ align_free(mach->Inputs);
+ align_free(mach->Outputs);
+ align_free(mach);
+ }
return NULL;
}
@@ -836,10 +870,13 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
if (mach->Instructions)
FREE(mach->Instructions);
if (mach->Declarations)
- FREE(mach->Declarations);
- }
+ FREE(mach->Declarations);
+
+ align_free(mach->Inputs);
+ align_free(mach->Outputs);
- align_free(mach);
+ align_free(mach);
+ }
}
static void
@@ -1038,7 +1075,6 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
break;
case TGSI_FILE_INPUT:
- case TGSI_FILE_SYSTEM_VALUE:
for (i = 0; i < QUAD_SIZE; i++) {
/*
if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
@@ -1048,11 +1084,20 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
}*/
int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
assert(pos >= 0);
- assert(pos < Elements(mach->Inputs));
+ assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
}
break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ /* XXX no swizzling at this point. Will be needed if we put
+ * gl_FragCoord, for example, in a sys value register.
+ */
+ for (i = 0; i < QUAD_SIZE; i++) {
+ chan->f[i] = mach->SystemValue[index->i[i]][0];
+ }
+ break;
+
case TGSI_FILE_TEMPORARY:
for (i = 0; i < QUAD_SIZE; i++) {
assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
@@ -1742,6 +1787,36 @@ exec_tex(struct tgsi_exec_machine *mach,
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
+ case TGSI_TEXTURE_1D_ARRAY:
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+
+ if (modifier == TEX_MODIFIER_PROJECTED) {
+ micro_div(&r[0], &r[0], &r[3]);
+ }
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
+ control,
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ break;
+
+ case TGSI_TEXTURE_2D_ARRAY:
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ if (modifier == TEX_MODIFIER_PROJECTED) {
+ micro_div(&r[0], &r[0], &r[3]);
+ micro_div(&r[1], &r[1], &r[3]);
+ }
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
+ control,
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ break;
+
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
FETCH(&r[0], 0, CHAN_X);
@@ -1835,6 +1910,164 @@ exec_txd(struct tgsi_exec_machine *mach,
}
+
+static void
+exec_sample(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ uint modifier)
+{
+ const uint resource_unit = inst->Src[1].Register.Index;
+ const uint sampler_unit = inst->Src[2].Register.Index;
+ union tgsi_exec_channel r[4];
+ const union tgsi_exec_channel *lod = &ZeroVec;
+ enum tgsi_sampler_control control;
+ uint chan;
+
+ if (modifier != TEX_MODIFIER_NONE) {
+ if (modifier == TEX_MODIFIER_LOD_BIAS)
+ FETCH(&r[3], 3, CHAN_X);
+ else /*TEX_MODIFIER_LOD*/
+ FETCH(&r[3], 0, CHAN_W);
+
+ if (modifier != TEX_MODIFIER_PROJECTED) {
+ lod = &r[3];
+ }
+ }
+
+ if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
+ control = tgsi_sampler_lod_explicit;
+ } else {
+ control = tgsi_sampler_lod_bias;
+ }
+
+ switch (mach->Resources[resource_unit].Resource) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ FETCH(&r[0], 0, CHAN_X);
+
+ if (modifier == TEX_MODIFIER_PROJECTED) {
+ micro_div(&r[0], &r[0], &r[3]);
+ }
+
+ fetch_texel(mach->Samplers[sampler_unit],
+ &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */
+ control,
+ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+ break;
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ if (modifier == TEX_MODIFIER_PROJECTED) {
+ micro_div(&r[0], &r[0], &r[3]);
+ micro_div(&r[1], &r[1], &r[3]);
+ micro_div(&r[2], &r[2], &r[3]);
+ }
+
+ fetch_texel(mach->Samplers[sampler_unit],
+ &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
+ control,
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ break;
+
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ if (modifier == TEX_MODIFIER_PROJECTED) {
+ micro_div(&r[0], &r[0], &r[3]);
+ micro_div(&r[1], &r[1], &r[3]);
+ micro_div(&r[2], &r[2], &r[3]);
+ }
+
+ fetch_texel(mach->Samplers[sampler_unit],
+ &r[0], &r[1], &r[2], lod,
+ control,
+ &r[0], &r[1], &r[2], &r[3]);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_sample_d(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ const uint resource_unit = inst->Src[1].Register.Index;
+ const uint sampler_unit = inst->Src[2].Register.Index;
+ union tgsi_exec_channel r[4];
+ uint chan;
+ /*
+ * XXX: This is fake SAMPLE_D -- the derivatives are not taken into account, yet.
+ */
+
+ switch (mach->Resources[resource_unit].Resource) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+
+ FETCH(&r[0], 0, CHAN_X);
+
+ fetch_texel(mach->Samplers[sampler_unit],
+ &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */
+ tgsi_sampler_lod_bias,
+ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+ break;
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ fetch_texel(mach->Samplers[sampler_unit],
+ &r[0], &r[1], &r[2], &ZeroVec, /* inputs */
+ tgsi_sampler_lod_bias,
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ break;
+
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ fetch_texel(mach->Samplers[sampler_unit],
+ &r[0], &r[1], &r[2], &ZeroVec,
+ tgsi_sampler_lod_bias,
+ &r[0], &r[1], &r[2], &r[3]);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+
/**
* Evaluate a constant-valued coefficient at the position of the
* current quad.
@@ -1906,9 +2139,13 @@ static void
exec_declaration(struct tgsi_exec_machine *mach,
const struct tgsi_full_declaration *decl)
{
+ if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
+ mach->Resources[decl->Range.First] = decl->Resource;
+ return;
+ }
+
if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
- if (decl->Declaration.File == TGSI_FILE_INPUT ||
- decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
+ if (decl->Declaration.File == TGSI_FILE_INPUT) {
uint first, last, mask;
first = decl->Range.First;
@@ -1921,6 +2158,7 @@ exec_declaration(struct tgsi_exec_machine *mach,
* ureg code to emit the right UsageMask value (WRITEMASK_X).
* Then, we could remove the tgsi_exec_machine::Face field.
*/
+ /* XXX make FACE a system value */
if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
uint i;
@@ -1962,8 +2200,13 @@ exec_declaration(struct tgsi_exec_machine *mach,
}
}
}
+
+ if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
+ mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First;
+ }
}
+
typedef void (* micro_op)(union tgsi_exec_channel *dst);
static void
@@ -3675,6 +3918,54 @@ exec_instruction(
exec_endswitch(mach);
break;
+ case TGSI_OPCODE_LOAD:
+ assert(0);
+ break;
+
+ case TGSI_OPCODE_LOAD_MS:
+ assert(0);
+ break;
+
+ case TGSI_OPCODE_SAMPLE:
+ exec_sample(mach, inst, TEX_MODIFIER_NONE);
+ break;
+
+ case TGSI_OPCODE_SAMPLE_B:
+ exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS);
+ break;
+
+ case TGSI_OPCODE_SAMPLE_C:
+ exec_sample(mach, inst, TEX_MODIFIER_NONE);
+ break;
+
+ case TGSI_OPCODE_SAMPLE_C_LZ:
+ exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS);
+ break;
+
+ case TGSI_OPCODE_SAMPLE_D:
+ exec_sample_d(mach, inst);
+ break;
+
+ case TGSI_OPCODE_SAMPLE_L:
+ exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD);
+ break;
+
+ case TGSI_OPCODE_GATHER4:
+ assert(0);
+ break;
+
+ case TGSI_OPCODE_RESINFO:
+ assert(0);
+ break;
+
+ case TGSI_OPCODE_SAMPLE_POS:
+ assert(0);
+ break;
+
+ case TGSI_OPCODE_SAMPLE_INFO:
+ assert(0);
+ break;
+
default:
assert( 0 );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index b5ebbfbfaa..33f33aa82c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -31,6 +31,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
extern "C" {
@@ -181,6 +182,8 @@ struct tgsi_sampler
/* The maximum total number of vertices */
#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS)
+#define TGSI_MAX_MISC_INPUTS 8
+
/** function call/activation record */
struct tgsi_call_record
{
@@ -225,8 +228,12 @@ struct tgsi_exec_machine
float ImmArray[TGSI_EXEC_NUM_IMMEDIATES][4];
- struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS];
- struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES];
+ struct tgsi_exec_vector *Inputs;
+ struct tgsi_exec_vector *Outputs;
+
+ /* System values */
+ unsigned SysSemanticToIndex[TGSI_SEMANTIC_COUNT];
+ float SystemValue[TGSI_MAX_MISC_INPUTS][4];
struct tgsi_exec_vector *Addrs;
struct tgsi_exec_vector *Predicates;
@@ -301,6 +308,9 @@ struct tgsi_exec_machine
struct tgsi_full_declaration *Declarations;
uint NumDeclarations;
+ struct tgsi_declaration_resource Resources[PIPE_MAX_SHADER_RESOURCES];
+
+ boolean UsedGeometryShader;
};
struct tgsi_exec_machine *
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index e59e964ffa..14ed56a1f6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -175,7 +175,20 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH },
{ 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE },
{ 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT },
- { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }
+ { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH },
+
+ { 1, 2, 0, 0, 0, 0, "LOAD", TGSI_OPCODE_LOAD },
+ { 1, 2, 0, 0, 0, 0, "LOAD_MS", TGSI_OPCODE_LOAD_MS },
+ { 1, 3, 0, 0, 0, 0, "SAMPLE", TGSI_OPCODE_SAMPLE },
+ { 1, 4, 0, 0, 0, 0, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B },
+ { 1, 4, 0, 0, 0, 0, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C },
+ { 1, 4, 0, 0, 0, 0, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ },
+ { 1, 5, 0, 0, 0, 0, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D },
+ { 1, 3, 0, 0, 0, 0, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L },
+ { 1, 3, 0, 0, 0, 0, "GATHER4", TGSI_OPCODE_GATHER4 },
+ { 1, 2, 0, 0, 0, 0, "RESINFO", TGSI_OPCODE_RESINFO },
+ { 1, 2, 0, 0, 0, 0, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS },
+ { 1, 2, 0, 0, 0, 0, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
};
const struct tgsi_opcode_info *
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index b3123ed016..b5d4504425 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -168,6 +168,19 @@ OP01(CASE)
OP00(DEFAULT)
OP00(ENDSWITCH)
+OP12(LOAD)
+OP12(LOAD_MS)
+OP13(SAMPLE)
+OP14(SAMPLE_B)
+OP14(SAMPLE_C)
+OP14(SAMPLE_C_LZ)
+OP15(SAMPLE_D)
+OP13(SAMPLE_L)
+OP13(GATHER4)
+OP12(RESINFO)
+OP13(SAMPLE_POS)
+OP12(SAMPLE_INFO)
+
#undef OP00
#undef OP01
@@ -180,6 +193,10 @@ OP00(ENDSWITCH)
#undef OP14
#endif
+#ifdef OP15
+#undef OP15
+#endif
+
#undef OP00_LBL
#undef OP01_LBL
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 1891203abe..fb36f9de32 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -128,6 +128,10 @@ tgsi_parse_token(
}
}
+ if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
+ next_token(ctx, &decl->Resource);
+ }
+
break;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index d4df585176..b7a3c9bc0e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -69,6 +69,7 @@ struct tgsi_full_declaration
struct tgsi_declaration_dimension Dim;
struct tgsi_declaration_semantic Semantic;
struct tgsi_immediate_array_data ImmediateData;
+ struct tgsi_declaration_resource Resource;
};
struct tgsi_full_immediate
@@ -84,7 +85,7 @@ struct tgsi_full_property
};
#define TGSI_FULL_MAX_DST_REGISTERS 2
-#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */
+#define TGSI_FULL_MAX_SRC_REGISTERS 5 /* SAMPLE_D has 5 */
struct tgsi_full_instruction
{
@@ -136,7 +137,8 @@ tgsi_parse_token(
static INLINE unsigned
tgsi_num_tokens(const struct tgsi_token *tokens)
{
- struct tgsi_header header = *(const struct tgsi_header *) tokens;
+ struct tgsi_header header;
+ memcpy(&header, tokens, sizeof(header));
return header.HeaderSize + header.BodySize;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 3521847b61..537a0f6c5e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -294,7 +294,6 @@ emit_fetch(struct gen_context *gen,
case TGSI_SWIZZLE_W:
switch (reg->Register.File) {
case TGSI_FILE_INPUT:
- case TGSI_FILE_SYSTEM_VALUE:
{
int offset = (reg->Register.Index * 4 + swizzle) * 16;
int offset_reg = emit_li_offset(gen, offset);
@@ -302,6 +301,9 @@ emit_fetch(struct gen_context *gen,
ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg);
}
break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(!"unhandled system value in tgsi_ppc.c");
+ break;
case TGSI_FILE_TEMPORARY:
if (is_ppc_vec_temporary(reg)) {
/* use PPC vec register */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index acbff103ef..509c534683 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -258,7 +258,8 @@ static const char *file_names[TGSI_FILE_COUNT] =
"PRED",
"SV",
"IMMX",
- "TEMPX"
+ "TEMPX",
+ "RES"
};
static boolean
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
index 73f0f414e3..707d11ec6d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
@@ -28,12 +28,14 @@
#ifndef TGSI_SANITY_H
#define TGSI_SANITY_H
-#include "pipe/p_shader_tokens.h"
-
#if defined __cplusplus
extern "C" {
#endif
+#include "pipe/p_compiler.h"
+
+struct tgsi_token;
+
/* Check the given token stream for errors and common mistakes.
* Diagnostic messages are printed out to the debug output, and is
* controlled by the debug option TGSI_PRINT_SANITY (default false).
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 6585da3e83..83c6ac75e5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -143,7 +143,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->file_count[file]++;
info->file_max[file] = MAX2(info->file_max[file], (int)reg);
- if (file == TGSI_FILE_INPUT || file == TGSI_FILE_SYSTEM_VALUE) {
+ if (file == TGSI_FILE_INPUT) {
info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
@@ -151,6 +151,23 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap;
info->num_inputs++;
}
+ else if (file == TGSI_FILE_SYSTEM_VALUE) {
+ unsigned index = fulldecl->Range.First;
+ unsigned semName = fulldecl->Semantic.Name;
+
+ info->system_value_semantic_name[index] = semName;
+ info->num_system_values = MAX2(info->num_system_values,
+ index + 1);
+
+ /*
+ info->system_value_semantic_name[info->num_system_values++] =
+ fulldecl->Semantic.Name;
+ */
+
+ if (fulldecl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+ info->uses_instanceid = TRUE;
+ }
+ }
else if (file == TGSI_FILE_OUTPUT) {
info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 104097fbc0..53ab3d509d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -51,6 +51,9 @@ struct tgsi_shader_info
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
+ ubyte num_system_values;
+ ubyte system_value_semantic_name[PIPE_MAX_SHADER_INPUTS];
+
uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */
uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */
int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */
@@ -64,6 +67,7 @@ struct tgsi_shader_info
boolean writes_stencil; /**< does fragment shader write stencil value? */
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KIL or KILP instruction used? */
+ boolean uses_instanceid;
/**
* Bitmask indicating which register files are accessed with
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 086d983a73..664946b00f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -28,7 +28,9 @@
#include "pipe/p_config.h"
-#if defined(PIPE_ARCH_X86)
+#include "tgsi/tgsi_sse2.h"
+
+#if defined(PIPE_ARCH_X86) && 0 /* See FIXME notes below */
#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
@@ -42,7 +44,6 @@
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_exec.h"
-#include "tgsi/tgsi_sse2.h"
#include "rtasm/rtasm_x86sse.h"
@@ -118,6 +119,7 @@ get_machine_base( void )
static struct x86_reg
get_input_base( void )
{
+ /* FIXME: tgsi_exec_machine::Inputs is a pointer now! */
return x86_make_disp(
get_machine_base(),
Offset(struct tgsi_exec_machine, Inputs) );
@@ -126,6 +128,7 @@ get_input_base( void )
static struct x86_reg
get_output_base( void )
{
+ /* FIXME: tgsi_exec_machine::Ouputs is a pointer now! */
return x86_make_disp(
get_machine_base(),
Offset(struct tgsi_exec_machine, Outputs) );
@@ -163,6 +166,14 @@ get_immediate_base( void )
reg_DX );
}
+static struct x86_reg
+get_system_value_base( void )
+{
+ return x86_make_disp(
+ get_machine_base(),
+ Offset(struct tgsi_exec_machine, SystemValue) );
+}
+
/**
* Data access helpers.
@@ -229,6 +240,16 @@ get_temp(
}
static struct x86_reg
+get_system_value(
+ unsigned vec,
+ unsigned chan )
+{
+ return x86_make_disp(
+ get_system_value_base(), /* base */
+ (vec * 4 + chan) * 4 ); /* byte offset from base */
+}
+
+static struct x86_reg
get_coef(
unsigned vec,
unsigned chan,
@@ -423,6 +444,30 @@ emit_tempf(
}
/**
+ * Copy a system value to xmm register
+ * \param xmm the destination xmm register
+ * \param vec the source system value register
+ * \param chan src channel to fetch (X, Y, Z or W)
+ */
+static void
+emit_system_value(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ sse_movss(
+ func,
+ make_xmm( xmm ),
+ get_system_value( vec, chan ) );
+ sse_shufps(
+ func,
+ make_xmm( xmm ),
+ make_xmm( xmm ),
+ SHUF( 0, 0, 0, 0 ) );
+}
+
+/**
* Load an xmm register with an input attrib coefficient (a0, dadx or dady)
* \param xmm the destination xmm register
* \param vec the src input/attribute coefficient index
@@ -1281,8 +1326,15 @@ emit_fetch(
swizzle );
break;
- case TGSI_FILE_INPUT:
case TGSI_FILE_SYSTEM_VALUE:
+ emit_system_value(
+ func,
+ xmm,
+ reg->Register.Index,
+ swizzle );
+ break;
+
+ case TGSI_FILE_INPUT:
emit_inputf(
func,
xmm,
@@ -1465,6 +1517,7 @@ emit_tex( struct x86_function *func,
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_1D_ARRAY:
count = 2;
break;
case TGSI_TEXTURE_SHADOW1D:
@@ -1472,6 +1525,7 @@ emit_tex( struct x86_function *func,
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
+ case TGSI_TEXTURE_2D_ARRAY:
count = 3;
break;
default:
@@ -2636,8 +2690,7 @@ emit_declaration(
struct x86_function *func,
struct tgsi_full_declaration *decl )
{
- if( decl->Declaration.File == TGSI_FILE_INPUT ||
- decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) {
+ if( decl->Declaration.File == TGSI_FILE_INPUT ) {
unsigned first, last, mask;
unsigned i, j;
@@ -2710,6 +2763,7 @@ static void aos_to_soa( struct x86_function *func,
x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) );
x86_mov( func, soa_input, x86_fn_arg( func, arg_machine ) );
+ /* FIXME: tgsi_exec_machine::Inputs is a pointer now! */
x86_lea( func, soa_input,
x86_make_disp( soa_input,
Offset(struct tgsi_exec_machine, Inputs) ) );
@@ -2778,6 +2832,7 @@ static void soa_to_aos( struct x86_function *func,
x86_mov( func, aos_output, x86_fn_arg( func, arg_aos ) );
x86_mov( func, soa_output, x86_fn_arg( func, arg_machine ) );
+ /* FIXME: tgsi_exec_machine::Ouputs is a pointer now! */
x86_lea( func, soa_output,
x86_make_disp( soa_output,
Offset(struct tgsi_exec_machine, Outputs) ) );
@@ -3032,4 +3087,16 @@ tgsi_emit_sse2(
return ok;
}
-#endif /* PIPE_ARCH_X86 */
+#else /* !PIPE_ARCH_X86 */
+
+unsigned
+tgsi_emit_sse2(
+ const struct tgsi_token *tokens,
+ struct x86_function *func,
+ float (*immediates)[4],
+ boolean do_swizzles )
+{
+ return 0;
+}
+
+#endif /* !PIPE_ARCH_X86 */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 9a38c37979..57622a0dea 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -36,6 +36,7 @@
#include "tgsi_parse.h"
#include "tgsi_sanity.h"
#include "tgsi_util.h"
+#include "tgsi_dump.h"
static boolean is_alpha_underscore( const char *cur )
{
@@ -136,7 +137,7 @@ static boolean parse_identifier( const char **pcur, char *ret )
int i = 0;
if (is_alpha_underscore( cur )) {
ret[i++] = *cur++;
- while (is_alpha_underscore( cur ))
+ while (is_alpha_underscore( cur ) || is_digit( cur ))
ret[i++] = *cur++;
ret[i++] = '\0';
*pcur = cur;
@@ -282,7 +283,8 @@ static const char *file_names[TGSI_FILE_COUNT] =
"PRED",
"SV",
"IMMX",
- "TEMPX"
+ "TEMPX",
+ "RES"
};
static boolean
@@ -827,6 +829,15 @@ static const char *texture_names[TGSI_TEXTURE_COUNT] =
"SHADOWRECT"
};
+static const char *type_names[] =
+{
+ "UNORM",
+ "SNORM",
+ "SINT",
+ "UINT",
+ "FLOAT"
+};
+
static boolean
match_inst_mnemonic(const char **pcur,
const struct tgsi_opcode_info *info)
@@ -1103,42 +1114,113 @@ static boolean parse_declaration( struct translate_ctx *ctx )
cur = ctx->cur;
eat_opt_white( &cur );
if (*cur == ',' && !is_vs_input) {
- uint i;
+ uint i, j;
cur++;
eat_opt_white( &cur );
- for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) {
- if (str_match_no_case( &cur, semantic_names[i] )) {
- const char *cur2 = cur;
- uint index;
-
- if (is_digit_alpha_underscore( cur ))
- continue;
- eat_opt_white( &cur2 );
- if (*cur2 == '[') {
- cur2++;
- eat_opt_white( &cur2 );
- if (!parse_uint( &cur2, &index )) {
- report_error( ctx, "Expected literal integer" );
+ if (file == TGSI_FILE_RESOURCE) {
+ for (i = 0; i < TGSI_TEXTURE_COUNT; i++) {
+ if (str_match_no_case(&cur, texture_names[i])) {
+ if (!is_digit_alpha_underscore(cur)) {
+ decl.Resource.Resource = i;
+ break;
+ }
+ }
+ }
+ if (i == TGSI_TEXTURE_COUNT) {
+ report_error(ctx, "Expected texture target");
+ return FALSE;
+ }
+ eat_opt_white( &cur );
+ if (*cur != ',') {
+ report_error( ctx, "Expected `,'" );
+ return FALSE;
+ }
+ ++cur;
+ eat_opt_white( &cur );
+ for (j = 0; j < 4; ++j) {
+ for (i = 0; i < PIPE_TYPE_COUNT; ++i) {
+ if (str_match_no_case(&cur, type_names[i])) {
+ if (!is_digit_alpha_underscore(cur)) {
+ switch (j) {
+ case 0:
+ decl.Resource.ReturnTypeX = i;
+ break;
+ case 1:
+ decl.Resource.ReturnTypeY = i;
+ break;
+ case 2:
+ decl.Resource.ReturnTypeZ = i;
+ break;
+ case 3:
+ decl.Resource.ReturnTypeW = i;
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ }
+ }
+ }
+ if (i == PIPE_TYPE_COUNT) {
+ if (j == 0 || j > 2) {
+ report_error(ctx, "Expected type name");
return FALSE;
}
+ break;
+ } else {
+ const char *cur2 = cur;
eat_opt_white( &cur2 );
- if (*cur2 != ']') {
- report_error( ctx, "Expected `]'" );
- return FALSE;
+ if (*cur2 == ',') {
+ cur2++;
+ eat_opt_white( &cur2 );
+ cur = cur2;
+ continue;
+ } else
+ break;
+ }
+ }
+ if (j < 4) {
+ decl.Resource.ReturnTypeY =
+ decl.Resource.ReturnTypeZ =
+ decl.Resource.ReturnTypeW =
+ decl.Resource.ReturnTypeX;
+ }
+ ctx->cur = cur;
+ } else {
+ for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) {
+ if (str_match_no_case( &cur, semantic_names[i] )) {
+ const char *cur2 = cur;
+ uint index;
+
+ if (is_digit_alpha_underscore( cur ))
+ continue;
+ eat_opt_white( &cur2 );
+ if (*cur2 == '[') {
+ cur2++;
+ eat_opt_white( &cur2 );
+ if (!parse_uint( &cur2, &index )) {
+ report_error( ctx, "Expected literal integer" );
+ return FALSE;
+ }
+ eat_opt_white( &cur2 );
+ if (*cur2 != ']') {
+ report_error( ctx, "Expected `]'" );
+ return FALSE;
+ }
+ cur2++;
+
+ decl.Semantic.Index = index;
+
+ cur = cur2;
}
- cur2++;
- decl.Semantic.Index = index;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.Name = i;
- cur = cur2;
+ ctx->cur = cur;
+ break;
}
-
- decl.Declaration.Semantic = 1;
- decl.Semantic.Name = i;
-
- ctx->cur = cur;
- break;
}
}
} else if (is_imm_array) {
@@ -1258,42 +1340,6 @@ static boolean parse_immediate( struct translate_ctx *ctx )
return TRUE;
}
-static const char *property_names[] =
-{
- "GS_INPUT_PRIMITIVE",
- "GS_OUTPUT_PRIMITIVE",
- "GS_MAX_OUTPUT_VERTICES",
- "FS_COORD_ORIGIN",
- "FS_COORD_PIXEL_CENTER"
-};
-
-static const char *primitive_names[] =
-{
- "POINTS",
- "LINES",
- "LINE_LOOP",
- "LINE_STRIP",
- "TRIANGLES",
- "TRIANGLE_STRIP",
- "TRIANGLE_FAN",
- "QUADS",
- "QUAD_STRIP",
- "POLYGON"
-};
-
-static const char *fs_coord_origin_names[] =
-{
- "UPPER_LEFT",
- "LOWER_LEFT"
-};
-
-static const char *fs_coord_pixel_center_names[] =
-{
- "HALF_INTEGER",
- "INTEGER"
-};
-
-
static boolean
parse_primitive( const char **pcur, uint *primitive )
{
@@ -1302,7 +1348,7 @@ parse_primitive( const char **pcur, uint *primitive )
for (i = 0; i < PIPE_PRIM_MAX; i++) {
const char *cur = *pcur;
- if (str_match_no_case( &cur, primitive_names[i])) {
+ if (str_match_no_case( &cur, tgsi_primitive_names[i])) {
*primitive = i;
*pcur = cur;
return TRUE;
@@ -1316,10 +1362,10 @@ parse_fs_coord_origin( const char **pcur, uint *fs_coord_origin )
{
uint i;
- for (i = 0; i < sizeof(fs_coord_origin_names) / sizeof(fs_coord_origin_names[0]); i++) {
+ for (i = 0; i < Elements(tgsi_fs_coord_origin_names); i++) {
const char *cur = *pcur;
- if (str_match_no_case( &cur, fs_coord_origin_names[i])) {
+ if (str_match_no_case( &cur, tgsi_fs_coord_origin_names[i])) {
*fs_coord_origin = i;
*pcur = cur;
return TRUE;
@@ -1333,10 +1379,10 @@ parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center )
{
uint i;
- for (i = 0; i < sizeof(fs_coord_pixel_center_names) / sizeof(fs_coord_pixel_center_names[0]); i++) {
+ for (i = 0; i < Elements(tgsi_fs_coord_pixel_center_names); i++) {
const char *cur = *pcur;
- if (str_match_no_case( &cur, fs_coord_pixel_center_names[i])) {
+ if (str_match_no_case( &cur, tgsi_fs_coord_pixel_center_names[i])) {
*fs_coord_pixel_center = i;
*pcur = cur;
return TRUE;
@@ -1364,7 +1410,7 @@ static boolean parse_property( struct translate_ctx *ctx )
}
for (property_name = 0; property_name < TGSI_PROPERTY_COUNT;
++property_name) {
- if (streq_nocase_uprcase(property_names[property_name], id)) {
+ if (streq_nocase_uprcase(tgsi_property_names[property_name], id)) {
break;
}
}
@@ -1398,6 +1444,7 @@ static boolean parse_property( struct translate_ctx *ctx )
return FALSE;
}
break;
+ case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
default:
if (!parse_uint(&ctx->cur, &values[0] )) {
report_error( ctx, "Expected unsigned integer as property!" );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.h b/src/gallium/auxiliary/tgsi/tgsi_text.h
index 8eeeeef140..5fe53cf007 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.h
@@ -28,12 +28,14 @@
#ifndef TGSI_TEXT_H
#define TGSI_TEXT_H
-#include "pipe/p_shader_tokens.h"
-
#if defined __cplusplus
extern "C" {
#endif
+#include "pipe/p_compiler.h"
+
+struct tgsi_token;
+
boolean
tgsi_text_translate(
const char *text,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 7d13a17bdb..4564ab81f9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -47,6 +47,7 @@ union tgsi_any_token {
struct tgsi_declaration_range decl_range;
struct tgsi_declaration_dimension decl_dim;
struct tgsi_declaration_semantic decl_semantic;
+ struct tgsi_declaration_resource decl_resource;
struct tgsi_immediate imm;
union tgsi_immediate_data imm_data;
struct tgsi_instruction insn;
@@ -137,6 +138,16 @@ struct ureg_program
struct ureg_src sampler[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
+ struct {
+ unsigned index;
+ unsigned target;
+ unsigned return_type_x;
+ unsigned return_type_y;
+ unsigned return_type_z;
+ unsigned return_type_w;
+ } resource[PIPE_MAX_SHADER_RESOURCES];
+ unsigned nr_resources;
+
unsigned temps_active[UREG_MAX_TEMP / 32];
unsigned nr_temps;
@@ -148,6 +159,7 @@ struct ureg_program
unsigned property_gs_max_vertices;
unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */
unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */
+ unsigned char property_fs_color0_writes_all_cbufs; /* = TGSI_FS_COLOR0_WRITES_ALL_CBUFS * */
unsigned nr_addrs;
unsigned nr_preds;
@@ -284,7 +296,12 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
ureg->property_fs_coord_pixel_center = fs_coord_pixel_center;
}
-
+void
+ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg,
+ unsigned fs_color0_writes_all_cbufs)
+{
+ ureg->property_fs_color0_writes_all_cbufs = fs_color0_writes_all_cbufs;
+}
struct ureg_src
ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
@@ -572,6 +589,41 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
return ureg->sampler[0];
}
+/*
+ * Allocate a new shader resource.
+ */
+struct ureg_src
+ureg_DECL_resource(struct ureg_program *ureg,
+ unsigned index,
+ unsigned target,
+ unsigned return_type_x,
+ unsigned return_type_y,
+ unsigned return_type_z,
+ unsigned return_type_w)
+{
+ struct ureg_src reg = ureg_src_register(TGSI_FILE_RESOURCE, index);
+ uint i;
+
+ for (i = 0; i < ureg->nr_resources; i++) {
+ if (ureg->resource[i].index == index) {
+ return reg;
+ }
+ }
+
+ if (i < PIPE_MAX_SHADER_RESOURCES) {
+ ureg->resource[i].index = index;
+ ureg->resource[i].target = target;
+ ureg->resource[i].return_type_x = return_type_x;
+ ureg->resource[i].return_type_y = return_type_y;
+ ureg->resource[i].return_type_z = return_type_z;
+ ureg->resource[i].return_type_w = return_type_w;
+ ureg->nr_resources++;
+ return reg;
+ }
+
+ assert(0);
+ return reg;
+}
static int
match_or_expand_immediate( const unsigned *v,
@@ -815,9 +867,10 @@ ureg_emit_dst( struct ureg_program *ureg,
assert(dst.File != TGSI_FILE_CONSTANT);
assert(dst.File != TGSI_FILE_INPUT);
assert(dst.File != TGSI_FILE_SAMPLER);
+ assert(dst.File != TGSI_FILE_RESOURCE);
assert(dst.File != TGSI_FILE_IMMEDIATE);
assert(dst.File < TGSI_FILE_COUNT);
-
+
out[n].value = 0;
out[n].dst.File = dst.File;
out[n].dst.WriteMask = dst.WriteMask;
@@ -1200,6 +1253,36 @@ emit_decl_range2D(struct ureg_program *ureg,
}
static void
+emit_decl_resource(struct ureg_program *ureg,
+ unsigned index,
+ unsigned target,
+ unsigned return_type_x,
+ unsigned return_type_y,
+ unsigned return_type_z,
+ unsigned return_type_w )
+{
+ union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
+
+ out[0].value = 0;
+ out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+ out[0].decl.NrTokens = 3;
+ out[0].decl.File = TGSI_FILE_RESOURCE;
+ out[0].decl.UsageMask = 0xf;
+ out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
+
+ out[1].value = 0;
+ out[1].decl_range.First = index;
+ out[1].decl_range.Last = index;
+
+ out[2].value = 0;
+ out[2].decl_resource.Resource = target;
+ out[2].decl_resource.ReturnTypeX = return_type_x;
+ out[2].decl_resource.ReturnTypeY = return_type_y;
+ out[2].decl_resource.ReturnTypeZ = return_type_z;
+ out[2].decl_resource.ReturnTypeW = return_type_w;
+}
+
+static void
emit_immediate( struct ureg_program *ureg,
const unsigned *v,
unsigned type )
@@ -1278,6 +1361,14 @@ static void emit_decls( struct ureg_program *ureg )
ureg->property_fs_coord_pixel_center);
}
+ if (ureg->property_fs_color0_writes_all_cbufs) {
+ assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
+
+ emit_property(ureg,
+ TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS,
+ ureg->property_fs_color0_writes_all_cbufs);
+ }
+
if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
for (i = 0; i < UREG_MAX_INPUT; i++) {
if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
@@ -1327,6 +1418,16 @@ static void emit_decls( struct ureg_program *ureg )
ureg->sampler[i].Index, 1 );
}
+ for (i = 0; i < ureg->nr_resources; i++) {
+ emit_decl_resource(ureg,
+ ureg->resource[i].index,
+ ureg->resource[i].target,
+ ureg->resource[i].return_type_x,
+ ureg->resource[i].return_type_y,
+ ureg->resource[i].return_type_z,
+ ureg->resource[i].return_type_w);
+ }
+
if (ureg->const_decls.nr_constant_ranges) {
for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
emit_decl_range(ureg,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index acc463200a..9d5553f0ea 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -43,24 +43,24 @@ struct ureg_program;
*/
struct ureg_src
{
- unsigned File : 4; /* TGSI_FILE_ */
- unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */
- unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */
- unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */
- unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */
- unsigned Indirect : 1; /* BOOL */
- unsigned DimIndirect : 1; /* BOOL */
- unsigned Dimension : 1; /* BOOL */
- unsigned Absolute : 1; /* BOOL */
- unsigned Negate : 1; /* BOOL */
- int Index : 16; /* SINT */
+ unsigned File : 4; /* TGSI_FILE_ */
+ unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */
+ unsigned Indirect : 1; /* BOOL */
+ unsigned DimIndirect : 1; /* BOOL */
+ unsigned Dimension : 1; /* BOOL */
+ unsigned Absolute : 1; /* BOOL */
+ unsigned Negate : 1; /* BOOL */
unsigned IndirectFile : 4; /* TGSI_FILE_ */
- int IndirectIndex : 16; /* SINT */
unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */
- int DimensionIndex : 16; /* SINT */
unsigned DimIndFile : 4; /* TGSI_FILE_ */
- int DimIndIndex : 16; /* SINT */
unsigned DimIndSwizzle : 2; /* TGSI_SWIZZLE_ */
+ int Index : 16; /* SINT */
+ int IndirectIndex : 16; /* SINT */
+ int DimensionIndex : 16; /* SINT */
+ int DimIndIndex : 16; /* SINT */
};
/* Very similar to a tgsi_dst_register, removing unsupported fields
@@ -153,6 +153,10 @@ void
ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
unsigned fs_coord_pixel_center);
+void
+ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg,
+ unsigned fs_color0_writes_all_cbufs);
+
/***********************************************************************
* Build shader declarations:
*/
@@ -266,6 +270,15 @@ struct ureg_src
ureg_DECL_sampler( struct ureg_program *,
unsigned index );
+struct ureg_src
+ureg_DECL_resource(struct ureg_program *,
+ unsigned index,
+ unsigned target,
+ unsigned return_type_x,
+ unsigned return_type_y,
+ unsigned return_type_z,
+ unsigned return_type_w );
+
static INLINE struct ureg_src
ureg_imm4f( struct ureg_program *ureg,
@@ -729,6 +742,66 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
+#define OP14( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ struct ureg_dst dst, \
+ struct ureg_src src0, \
+ struct ureg_src src1, \
+ struct ureg_src src2, \
+ struct ureg_src src3 ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn(ureg, \
+ opcode, \
+ dst.Saturate, \
+ dst.Predicate, \
+ dst.PredNegate, \
+ dst.PredSwizzleX, \
+ dst.PredSwizzleY, \
+ dst.PredSwizzleZ, \
+ dst.PredSwizzleW, \
+ 1, \
+ 4).insn_token; \
+ ureg_emit_dst( ureg, dst ); \
+ ureg_emit_src( ureg, src0 ); \
+ ureg_emit_src( ureg, src1 ); \
+ ureg_emit_src( ureg, src2 ); \
+ ureg_emit_src( ureg, src3 ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+
+#define OP15( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ struct ureg_dst dst, \
+ struct ureg_src src0, \
+ struct ureg_src src1, \
+ struct ureg_src src2, \
+ struct ureg_src src3, \
+ struct ureg_src src4 ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn(ureg, \
+ opcode, \
+ dst.Saturate, \
+ dst.Predicate, \
+ dst.PredNegate, \
+ dst.PredSwizzleX, \
+ dst.PredSwizzleY, \
+ dst.PredSwizzleZ, \
+ dst.PredSwizzleW, \
+ 1, \
+ 5).insn_token; \
+ ureg_emit_dst( ureg, dst ); \
+ ureg_emit_src( ureg, src0 ); \
+ ureg_emit_src( ureg, src1 ); \
+ ureg_emit_src( ureg, src2 ); \
+ ureg_emit_src( ureg, src3 ); \
+ ureg_emit_src( ureg, src4 ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+
/* Use a template include to generate a correctly-typed ureg_OP()
* function for each TGSI opcode:
*/
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 08e7e89bd6..aa9a886d4a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -280,7 +280,12 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
case TGSI_TEXTURE_CUBE:
read_mask = TGSI_WRITEMASK_XYZ;
break;
-
+ case TGSI_TEXTURE_1D_ARRAY:
+ read_mask = TGSI_WRITEMASK_XY;
+ break;
+ case TGSI_TEXTURE_2D_ARRAY:
+ read_mask = TGSI_WRITEMASK_XYZ;
+ break;
default:
assert(0);
read_mask = 0;
diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index 73287b667d..05e455388b 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -39,7 +39,7 @@ struct translate *translate_create( const struct translate_key *key )
struct translate *translate = NULL;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- translate = translate_sse2_create( key );
+ /*translate = translate_sse2_create( key );*/
if (translate)
return translate;
#else
diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c
index d8069a149c..3f1ecb630f 100644
--- a/src/gallium/auxiliary/translate/translate_cache.c
+++ b/src/gallium/auxiliary/translate/translate_cache.c
@@ -40,6 +40,10 @@ struct translate_cache {
struct translate_cache * translate_cache_create( void )
{
struct translate_cache *cache = MALLOC_STRUCT(translate_cache);
+ if (cache == NULL) {
+ return NULL;
+ }
+
cache->hash = cso_hash_create();
return cache;
}
diff --git a/src/gallium/auxiliary/util/dbghelp.h b/src/gallium/auxiliary/util/dbghelp.h
new file mode 100644
index 0000000000..bc7c53cf9f
--- /dev/null
+++ b/src/gallium/auxiliary/util/dbghelp.h
@@ -0,0 +1,1265 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the w64 mingw-runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+#ifndef _DBGHELP_
+#define _DBGHELP_
+
+#ifdef _WIN64
+#ifndef _IMAGEHLP64
+#define _IMAGEHLP64
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IMAGEAPI DECLSPEC_IMPORT WINAPI
+#define DBHLP_DEPRECIATED __declspec(deprecated)
+
+#define DBHLPAPI IMAGEAPI
+
+#define IMAGE_SEPARATION (64*1024)
+
+ typedef struct _LOADED_IMAGE {
+ PSTR ModuleName;
+ HANDLE hFile;
+ PUCHAR MappedAddress;
+#ifdef _IMAGEHLP64
+ PIMAGE_NT_HEADERS64 FileHeader;
+#else
+ PIMAGE_NT_HEADERS32 FileHeader;
+#endif
+ PIMAGE_SECTION_HEADER LastRvaSection;
+ ULONG NumberOfSections;
+ PIMAGE_SECTION_HEADER Sections;
+ ULONG Characteristics;
+ BOOLEAN fSystemImage;
+ BOOLEAN fDOSImage;
+ LIST_ENTRY Links;
+ ULONG SizeOfImage;
+ } LOADED_IMAGE,*PLOADED_IMAGE;
+
+#define MAX_SYM_NAME 2000
+
+ typedef BOOL (CALLBACK *PFIND_DEBUG_FILE_CALLBACK)(HANDLE FileHandle,PSTR FileName,PVOID CallerData);
+ typedef BOOL (CALLBACK *PFINDFILEINPATHCALLBACK)(PSTR filename,PVOID context);
+ typedef BOOL (CALLBACK *PFIND_EXE_FILE_CALLBACK)(HANDLE FileHandle,PSTR FileName,PVOID CallerData);
+
+ typedef BOOL (WINAPI *PSYMBOLSERVERPROC)(LPCSTR,LPCSTR,PVOID,DWORD,DWORD,LPSTR);
+ typedef BOOL (WINAPI *PSYMBOLSERVEROPENPROC)(VOID);
+ typedef BOOL (WINAPI *PSYMBOLSERVERCLOSEPROC)(VOID);
+ typedef BOOL (WINAPI *PSYMBOLSERVERSETOPTIONSPROC)(UINT_PTR,ULONG64);
+ typedef BOOL (CALLBACK WINAPI *PSYMBOLSERVERCALLBACKPROC)(UINT_PTR action,ULONG64 data,ULONG64 context);
+ typedef UINT_PTR (WINAPI *PSYMBOLSERVERGETOPTIONSPROC)();
+ typedef BOOL (WINAPI *PSYMBOLSERVERPINGPROC)(LPCSTR);
+
+ HANDLE IMAGEAPI FindDebugInfoFile(PSTR FileName,PSTR SymbolPath,PSTR DebugFilePath);
+ HANDLE IMAGEAPI FindDebugInfoFileEx(PSTR FileName,PSTR SymbolPath,PSTR DebugFilePath,PFIND_DEBUG_FILE_CALLBACK Callback,PVOID CallerData);
+ BOOL IMAGEAPI SymFindFileInPath(HANDLE hprocess,LPSTR SearchPath,LPSTR FileName,PVOID id,DWORD two,DWORD three,DWORD flags,LPSTR FoundFile,PFINDFILEINPATHCALLBACK callback,PVOID context);
+ HANDLE IMAGEAPI FindExecutableImage(PSTR FileName,PSTR SymbolPath,PSTR ImageFilePath);
+ HANDLE IMAGEAPI FindExecutableImageEx(PSTR FileName,PSTR SymbolPath,PSTR ImageFilePath,PFIND_EXE_FILE_CALLBACK Callback,PVOID CallerData);
+ PIMAGE_NT_HEADERS IMAGEAPI ImageNtHeader(PVOID Base);
+ PVOID IMAGEAPI ImageDirectoryEntryToDataEx(PVOID Base,BOOLEAN MappedAsImage,USHORT DirectoryEntry,PULONG Size,PIMAGE_SECTION_HEADER *FoundHeader);
+ PVOID IMAGEAPI ImageDirectoryEntryToData(PVOID Base,BOOLEAN MappedAsImage,USHORT DirectoryEntry,PULONG Size);
+ PIMAGE_SECTION_HEADER IMAGEAPI ImageRvaToSection(PIMAGE_NT_HEADERS NtHeaders,PVOID Base,ULONG Rva);
+ PVOID IMAGEAPI ImageRvaToVa(PIMAGE_NT_HEADERS NtHeaders,PVOID Base,ULONG Rva,PIMAGE_SECTION_HEADER *LastRvaSection);
+
+#define SSRVOPT_CALLBACK 0x0001
+#define SSRVOPT_DWORD 0x0002
+#define SSRVOPT_DWORDPTR 0x0004
+#define SSRVOPT_GUIDPTR 0x0008
+#define SSRVOPT_OLDGUIDPTR 0x0010
+#define SSRVOPT_UNATTENDED 0x0020
+#define SSRVOPT_NOCOPY 0x0040
+#define SSRVOPT_PARENTWIN 0x0080
+#define SSRVOPT_PARAMTYPE 0x0100
+#define SSRVOPT_SECURE 0x0200
+#define SSRVOPT_TRACE 0x0400
+#define SSRVOPT_SETCONTEXT 0x0800
+#define SSRVOPT_PROXY 0x1000
+#define SSRVOPT_DOWNSTREAM_STORE 0x2000
+#define SSRVOPT_RESET ((ULONG_PTR)-1)
+
+#define SSRVACTION_TRACE 1
+#define SSRVACTION_QUERYCANCEL 2
+#define SSRVACTION_EVENT 3
+
+#ifndef _WIN64
+
+ typedef struct _IMAGE_DEBUG_INFORMATION {
+ LIST_ENTRY List;
+ DWORD ReservedSize;
+ PVOID ReservedMappedBase;
+ USHORT ReservedMachine;
+ USHORT ReservedCharacteristics;
+ DWORD ReservedCheckSum;
+ DWORD ImageBase;
+ DWORD SizeOfImage;
+ DWORD ReservedNumberOfSections;
+ PIMAGE_SECTION_HEADER ReservedSections;
+ DWORD ReservedExportedNamesSize;
+ PSTR ReservedExportedNames;
+ DWORD ReservedNumberOfFunctionTableEntries;
+ PIMAGE_FUNCTION_ENTRY ReservedFunctionTableEntries;
+ DWORD ReservedLowestFunctionStartingAddress;
+ DWORD ReservedHighestFunctionEndingAddress;
+ DWORD ReservedNumberOfFpoTableEntries;
+ PFPO_DATA ReservedFpoTableEntries;
+ DWORD SizeOfCoffSymbols;
+ PIMAGE_COFF_SYMBOLS_HEADER CoffSymbols;
+ DWORD ReservedSizeOfCodeViewSymbols;
+ PVOID ReservedCodeViewSymbols;
+ PSTR ImageFilePath;
+ PSTR ImageFileName;
+ PSTR ReservedDebugFilePath;
+ DWORD ReservedTimeDateStamp;
+ BOOL ReservedRomImage;
+ PIMAGE_DEBUG_DIRECTORY ReservedDebugDirectory;
+ DWORD ReservedNumberOfDebugDirectories;
+ DWORD ReservedOriginalFunctionTableBaseAddress;
+ DWORD Reserved[2 ];
+ } IMAGE_DEBUG_INFORMATION,*PIMAGE_DEBUG_INFORMATION;
+
+ PIMAGE_DEBUG_INFORMATION IMAGEAPI MapDebugInformation(HANDLE FileHandle,PSTR FileName,PSTR SymbolPath,DWORD ImageBase);
+ BOOL IMAGEAPI UnmapDebugInformation(PIMAGE_DEBUG_INFORMATION DebugInfo);
+#endif
+
+ typedef BOOL (CALLBACK *PENUMDIRTREE_CALLBACK)(LPCSTR FilePath,PVOID CallerData);
+
+ BOOL IMAGEAPI SearchTreeForFile(PSTR RootPath,PSTR InputPathName,PSTR OutputPathBuffer);
+ BOOL IMAGEAPI EnumDirTree(HANDLE hProcess,PSTR RootPath,PSTR InputPathName,PSTR OutputPathBuffer,PENUMDIRTREE_CALLBACK Callback,PVOID CallbackData);
+ BOOL IMAGEAPI MakeSureDirectoryPathExists(PCSTR DirPath);
+
+#define UNDNAME_COMPLETE (0x0000)
+#define UNDNAME_NO_LEADING_UNDERSCORES (0x0001)
+#define UNDNAME_NO_MS_KEYWORDS (0x0002)
+#define UNDNAME_NO_FUNCTION_RETURNS (0x0004)
+#define UNDNAME_NO_ALLOCATION_MODEL (0x0008)
+#define UNDNAME_NO_ALLOCATION_LANGUAGE (0x0010)
+#define UNDNAME_NO_MS_THISTYPE (0x0020)
+#define UNDNAME_NO_CV_THISTYPE (0x0040)
+#define UNDNAME_NO_THISTYPE (0x0060)
+#define UNDNAME_NO_ACCESS_SPECIFIERS (0x0080)
+#define UNDNAME_NO_THROW_SIGNATURES (0x0100)
+#define UNDNAME_NO_MEMBER_TYPE (0x0200)
+#define UNDNAME_NO_RETURN_UDT_MODEL (0x0400)
+#define UNDNAME_32_BIT_DECODE (0x0800)
+#define UNDNAME_NAME_ONLY (0x1000)
+#define UNDNAME_NO_ARGUMENTS (0x2000)
+#define UNDNAME_NO_SPECIAL_SYMS (0x4000)
+
+ DWORD IMAGEAPI WINAPI UnDecorateSymbolName(PCSTR DecoratedName,PSTR UnDecoratedName,DWORD UndecoratedLength,DWORD Flags);
+
+#define DBHHEADER_DEBUGDIRS 0x1
+
+ typedef struct _MODLOAD_DATA {
+ DWORD ssize;
+ DWORD ssig;
+ PVOID data;
+ DWORD size;
+ DWORD flags;
+ } MODLOAD_DATA,*PMODLOAD_DATA;
+
+ typedef enum {
+ AddrMode1616,AddrMode1632,AddrModeReal,AddrModeFlat
+ } ADDRESS_MODE;
+
+ typedef struct _tagADDRESS64 {
+ DWORD64 Offset;
+ WORD Segment;
+ ADDRESS_MODE Mode;
+ } ADDRESS64,*LPADDRESS64;
+
+#ifdef _IMAGEHLP64
+#define ADDRESS ADDRESS64
+#define LPADDRESS LPADDRESS64
+#else
+ typedef struct _tagADDRESS {
+ DWORD Offset;
+ WORD Segment;
+ ADDRESS_MODE Mode;
+ } ADDRESS,*LPADDRESS;
+
+ static __inline void Address32To64(LPADDRESS a32,LPADDRESS64 a64) {
+ a64->Offset = (ULONG64)(LONG64)(LONG)a32->Offset;
+ a64->Segment = a32->Segment;
+ a64->Mode = a32->Mode;
+ }
+
+ static __inline void Address64To32(LPADDRESS64 a64,LPADDRESS a32) {
+ a32->Offset = (ULONG)a64->Offset;
+ a32->Segment = a64->Segment;
+ a32->Mode = a64->Mode;
+ }
+#endif
+
+ typedef struct _KDHELP64 {
+ DWORD64 Thread;
+ DWORD ThCallbackStack;
+ DWORD ThCallbackBStore;
+ DWORD NextCallback;
+ DWORD FramePointer;
+ DWORD64 KiCallUserMode;
+ DWORD64 KeUserCallbackDispatcher;
+ DWORD64 SystemRangeStart;
+ DWORD64 Reserved[8];
+ } KDHELP64,*PKDHELP64;
+
+#ifdef _IMAGEHLP64
+#define KDHELP KDHELP64
+#define PKDHELP PKDHELP64
+#else
+ typedef struct _KDHELP {
+ DWORD Thread;
+ DWORD ThCallbackStack;
+ DWORD NextCallback;
+ DWORD FramePointer;
+ DWORD KiCallUserMode;
+ DWORD KeUserCallbackDispatcher;
+ DWORD SystemRangeStart;
+ DWORD ThCallbackBStore;
+ DWORD Reserved[8];
+ } KDHELP,*PKDHELP;
+
+ static __inline void KdHelp32To64(PKDHELP p32,PKDHELP64 p64) {
+ p64->Thread = p32->Thread;
+ p64->ThCallbackStack = p32->ThCallbackStack;
+ p64->NextCallback = p32->NextCallback;
+ p64->FramePointer = p32->FramePointer;
+ p64->KiCallUserMode = p32->KiCallUserMode;
+ p64->KeUserCallbackDispatcher = p32->KeUserCallbackDispatcher;
+ p64->SystemRangeStart = p32->SystemRangeStart;
+ }
+#endif
+
+ typedef struct _tagSTACKFRAME64 {
+ ADDRESS64 AddrPC;
+ ADDRESS64 AddrReturn;
+ ADDRESS64 AddrFrame;
+ ADDRESS64 AddrStack;
+ ADDRESS64 AddrBStore;
+ PVOID FuncTableEntry;
+ DWORD64 Params[4];
+ BOOL Far;
+ BOOL Virtual;
+ DWORD64 Reserved[3];
+ KDHELP64 KdHelp;
+ } STACKFRAME64,*LPSTACKFRAME64;
+
+#ifdef _IMAGEHLP64
+#define STACKFRAME STACKFRAME64
+#define LPSTACKFRAME LPSTACKFRAME64
+#else
+ typedef struct _tagSTACKFRAME {
+ ADDRESS AddrPC;
+ ADDRESS AddrReturn;
+ ADDRESS AddrFrame;
+ ADDRESS AddrStack;
+ PVOID FuncTableEntry;
+ DWORD Params[4];
+ BOOL Far;
+ BOOL Virtual;
+ DWORD Reserved[3];
+ KDHELP KdHelp;
+ ADDRESS AddrBStore;
+ } STACKFRAME,*LPSTACKFRAME;
+#endif
+
+ typedef BOOL (WINAPI *PREAD_PROCESS_MEMORY_ROUTINE64)(HANDLE hProcess,DWORD64 qwBaseAddress,PVOID lpBuffer,DWORD nSize,LPDWORD lpNumberOfBytesRead);
+ typedef PVOID (WINAPI *PFUNCTION_TABLE_ACCESS_ROUTINE64)(HANDLE hProcess,DWORD64 AddrBase);
+ typedef DWORD64 (WINAPI *PGET_MODULE_BASE_ROUTINE64)(HANDLE hProcess,DWORD64 Address);
+ typedef DWORD64 (WINAPI *PTRANSLATE_ADDRESS_ROUTINE64)(HANDLE hProcess,HANDLE hThread,LPADDRESS64 lpaddr);
+
+ BOOL IMAGEAPI StackWalk64(DWORD MachineType,HANDLE hProcess,HANDLE hThread,LPSTACKFRAME64 StackFrame,PVOID ContextRecord,PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine,PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine,PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine,PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress);
+
+#ifdef _IMAGEHLP64
+#define PREAD_PROCESS_MEMORY_ROUTINE PREAD_PROCESS_MEMORY_ROUTINE64
+#define PFUNCTION_TABLE_ACCESS_ROUTINE PFUNCTION_TABLE_ACCESS_ROUTINE64
+#define PGET_MODULE_BASE_ROUTINE PGET_MODULE_BASE_ROUTINE64
+#define PTRANSLATE_ADDRESS_ROUTINE PTRANSLATE_ADDRESS_ROUTINE64
+#define StackWalk StackWalk64
+#else
+ typedef BOOL (WINAPI *PREAD_PROCESS_MEMORY_ROUTINE)(HANDLE hProcess,DWORD lpBaseAddress,PVOID lpBuffer,DWORD nSize,PDWORD lpNumberOfBytesRead);
+ typedef PVOID (WINAPI *PFUNCTION_TABLE_ACCESS_ROUTINE)(HANDLE hProcess,DWORD AddrBase);
+ typedef DWORD (WINAPI *PGET_MODULE_BASE_ROUTINE)(HANDLE hProcess,DWORD Address);
+ typedef DWORD (WINAPI *PTRANSLATE_ADDRESS_ROUTINE)(HANDLE hProcess,HANDLE hThread,LPADDRESS lpaddr);
+
+ BOOL IMAGEAPI StackWalk(DWORD MachineType,HANDLE hProcess,HANDLE hThread,LPSTACKFRAME StackFrame,PVOID ContextRecord,PREAD_PROCESS_MEMORY_ROUTINE ReadMemoryRoutine,PFUNCTION_TABLE_ACCESS_ROUTINE FunctionTableAccessRoutine,PGET_MODULE_BASE_ROUTINE GetModuleBaseRoutine,PTRANSLATE_ADDRESS_ROUTINE TranslateAddress);
+#endif
+
+#define API_VERSION_NUMBER 9
+
+ typedef struct API_VERSION {
+ USHORT MajorVersion;
+ USHORT MinorVersion;
+ USHORT Revision;
+ USHORT Reserved;
+ } API_VERSION,*LPAPI_VERSION;
+
+ LPAPI_VERSION IMAGEAPI ImagehlpApiVersion(VOID);
+ LPAPI_VERSION IMAGEAPI ImagehlpApiVersionEx(LPAPI_VERSION AppVersion);
+ DWORD IMAGEAPI GetTimestampForLoadedLibrary(HMODULE Module);
+
+ typedef BOOL (CALLBACK *PSYM_ENUMMODULES_CALLBACK64)(PSTR ModuleName,DWORD64 BaseOfDll,PVOID UserContext);
+ typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACK64)(PSTR SymbolName,DWORD64 SymbolAddress,ULONG SymbolSize,PVOID UserContext);
+ typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACK64W)(PWSTR SymbolName,DWORD64 SymbolAddress,ULONG SymbolSize,PVOID UserContext);
+ typedef BOOL (CALLBACK *PENUMLOADED_MODULES_CALLBACK64)(PSTR ModuleName,DWORD64 ModuleBase,ULONG ModuleSize,PVOID UserContext);
+ typedef BOOL (CALLBACK *PSYMBOL_REGISTERED_CALLBACK64)(HANDLE hProcess,ULONG ActionCode,ULONG64 CallbackData,ULONG64 UserContext);
+ typedef PVOID (CALLBACK *PSYMBOL_FUNCENTRY_CALLBACK)(HANDLE hProcess,DWORD AddrBase,PVOID UserContext);
+ typedef PVOID (CALLBACK *PSYMBOL_FUNCENTRY_CALLBACK64)(HANDLE hProcess,ULONG64 AddrBase,ULONG64 UserContext);
+
+#ifdef _IMAGEHLP64
+#define PSYM_ENUMMODULES_CALLBACK PSYM_ENUMMODULES_CALLBACK64
+#define PSYM_ENUMSYMBOLS_CALLBACK PSYM_ENUMSYMBOLS_CALLBACK64
+#define PSYM_ENUMSYMBOLS_CALLBACKW PSYM_ENUMSYMBOLS_CALLBACK64W
+#define PENUMLOADED_MODULES_CALLBACK PENUMLOADED_MODULES_CALLBACK64
+#define PSYMBOL_REGISTERED_CALLBACK PSYMBOL_REGISTERED_CALLBACK64
+#define PSYMBOL_FUNCENTRY_CALLBACK PSYMBOL_FUNCENTRY_CALLBACK64
+#else
+ typedef BOOL (CALLBACK *PSYM_ENUMMODULES_CALLBACK)(PSTR ModuleName,ULONG BaseOfDll,PVOID UserContext);
+ typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACK)(PSTR SymbolName,ULONG SymbolAddress,ULONG SymbolSize,PVOID UserContext);
+ typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACKW)(PWSTR SymbolName,ULONG SymbolAddress,ULONG SymbolSize,PVOID UserContext);
+ typedef BOOL (CALLBACK *PENUMLOADED_MODULES_CALLBACK)(PSTR ModuleName,ULONG ModuleBase,ULONG ModuleSize,PVOID UserContext);
+ typedef BOOL (CALLBACK *PSYMBOL_REGISTERED_CALLBACK)(HANDLE hProcess,ULONG ActionCode,PVOID CallbackData,PVOID UserContext);
+#endif
+
+#define SYMFLAG_VALUEPRESENT 0x00000001
+#define SYMFLAG_REGISTER 0x00000008
+#define SYMFLAG_REGREL 0x00000010
+#define SYMFLAG_FRAMEREL 0x00000020
+#define SYMFLAG_PARAMETER 0x00000040
+#define SYMFLAG_LOCAL 0x00000080
+#define SYMFLAG_CONSTANT 0x00000100
+#define SYMFLAG_EXPORT 0x00000200
+#define SYMFLAG_FORWARDER 0x00000400
+#define SYMFLAG_FUNCTION 0x00000800
+#define SYMFLAG_VIRTUAL 0x00001000
+#define SYMFLAG_THUNK 0x00002000
+#define SYMFLAG_TLSREL 0x00004000
+
+ typedef enum {
+ SymNone = 0,SymCoff,SymCv,SymPdb,SymExport,SymDeferred,SymSym,SymDia,SymVirtual,NumSymTypes
+ } SYM_TYPE;
+
+ typedef struct _IMAGEHLP_SYMBOL64 {
+ DWORD SizeOfStruct;
+ DWORD64 Address;
+ DWORD Size;
+ DWORD Flags;
+ DWORD MaxNameLength;
+ CHAR Name[1];
+ } IMAGEHLP_SYMBOL64,*PIMAGEHLP_SYMBOL64;
+
+ typedef struct _IMAGEHLP_SYMBOL64_PACKAGE {
+ IMAGEHLP_SYMBOL64 sym;
+ CHAR name[MAX_SYM_NAME + 1];
+ } IMAGEHLP_SYMBOL64_PACKAGE,*PIMAGEHLP_SYMBOL64_PACKAGE;
+
+#ifdef _IMAGEHLP64
+
+#define IMAGEHLP_SYMBOL IMAGEHLP_SYMBOL64
+#define PIMAGEHLP_SYMBOL PIMAGEHLP_SYMBOL64
+#define IMAGEHLP_SYMBOL_PACKAGE IMAGEHLP_SYMBOL64_PACKAGE
+#define PIMAGEHLP_SYMBOL_PACKAGE PIMAGEHLP_SYMBOL64_PACKAGE
+#else
+
+ typedef struct _IMAGEHLP_SYMBOL {
+ DWORD SizeOfStruct;
+ DWORD Address;
+ DWORD Size;
+ DWORD Flags;
+ DWORD MaxNameLength;
+ CHAR Name[1];
+ } IMAGEHLP_SYMBOL,*PIMAGEHLP_SYMBOL;
+
+ typedef struct _IMAGEHLP_SYMBOL_PACKAGE {
+ IMAGEHLP_SYMBOL sym;
+ CHAR name[MAX_SYM_NAME + 1];
+ } IMAGEHLP_SYMBOL_PACKAGE,*PIMAGEHLP_SYMBOL_PACKAGE;
+#endif
+
+ typedef struct _IMAGEHLP_MODULE64 {
+ DWORD SizeOfStruct;
+ DWORD64 BaseOfImage;
+ DWORD ImageSize;
+ DWORD TimeDateStamp;
+ DWORD CheckSum;
+ DWORD NumSyms;
+ SYM_TYPE SymType;
+ CHAR ModuleName[32];
+ CHAR ImageName[256];
+ CHAR LoadedImageName[256];
+ CHAR LoadedPdbName[256];
+ DWORD CVSig;
+ CHAR CVData[MAX_PATH*3];
+ DWORD PdbSig;
+ GUID PdbSig70;
+ DWORD PdbAge;
+ BOOL PdbUnmatched;
+ BOOL DbgUnmatched;
+ BOOL LineNumbers;
+ BOOL GlobalSymbols;
+ BOOL TypeInfo;
+ } IMAGEHLP_MODULE64,*PIMAGEHLP_MODULE64;
+
+ typedef struct _IMAGEHLP_MODULE64W {
+ DWORD SizeOfStruct;
+ DWORD64 BaseOfImage;
+ DWORD ImageSize;
+ DWORD TimeDateStamp;
+ DWORD CheckSum;
+ DWORD NumSyms;
+ SYM_TYPE SymType;
+ WCHAR ModuleName[32];
+ WCHAR ImageName[256];
+ WCHAR LoadedImageName[256];
+ WCHAR LoadedPdbName[256];
+ DWORD CVSig;
+ WCHAR CVData[MAX_PATH*3];
+ DWORD PdbSig;
+ GUID PdbSig70;
+ DWORD PdbAge;
+ BOOL PdbUnmatched;
+ BOOL DbgUnmatched;
+ BOOL LineNumbers;
+ BOOL GlobalSymbols;
+ BOOL TypeInfo;
+ } IMAGEHLP_MODULEW64,*PIMAGEHLP_MODULEW64;
+
+#ifdef _IMAGEHLP64
+#define IMAGEHLP_MODULE IMAGEHLP_MODULE64
+#define PIMAGEHLP_MODULE PIMAGEHLP_MODULE64
+#define IMAGEHLP_MODULEW IMAGEHLP_MODULEW64
+#define PIMAGEHLP_MODULEW PIMAGEHLP_MODULEW64
+#else
+ typedef struct _IMAGEHLP_MODULE {
+ DWORD SizeOfStruct;
+ DWORD BaseOfImage;
+ DWORD ImageSize;
+ DWORD TimeDateStamp;
+ DWORD CheckSum;
+ DWORD NumSyms;
+ SYM_TYPE SymType;
+ CHAR ModuleName[32];
+ CHAR ImageName[256];
+ CHAR LoadedImageName[256];
+ } IMAGEHLP_MODULE,*PIMAGEHLP_MODULE;
+
+ typedef struct _IMAGEHLP_MODULEW {
+ DWORD SizeOfStruct;
+ DWORD BaseOfImage;
+ DWORD ImageSize;
+ DWORD TimeDateStamp;
+ DWORD CheckSum;
+ DWORD NumSyms;
+ SYM_TYPE SymType;
+ WCHAR ModuleName[32];
+ WCHAR ImageName[256];
+ WCHAR LoadedImageName[256];
+ } IMAGEHLP_MODULEW,*PIMAGEHLP_MODULEW;
+#endif
+
+ typedef struct _IMAGEHLP_LINE64 {
+ DWORD SizeOfStruct;
+ PVOID Key;
+ DWORD LineNumber;
+ PCHAR FileName;
+ DWORD64 Address;
+ } IMAGEHLP_LINE64,*PIMAGEHLP_LINE64;
+
+#ifdef _IMAGEHLP64
+#define IMAGEHLP_LINE IMAGEHLP_LINE64
+#define PIMAGEHLP_LINE PIMAGEHLP_LINE64
+#else
+ typedef struct _IMAGEHLP_LINE {
+ DWORD SizeOfStruct;
+ PVOID Key;
+ DWORD LineNumber;
+ PCHAR FileName;
+ DWORD Address;
+ } IMAGEHLP_LINE,*PIMAGEHLP_LINE;
+#endif
+
+ typedef struct _SOURCEFILE {
+ DWORD64 ModBase;
+ PCHAR FileName;
+ } SOURCEFILE,*PSOURCEFILE;
+
+#define CBA_DEFERRED_SYMBOL_LOAD_START 0x00000001
+#define CBA_DEFERRED_SYMBOL_LOAD_COMPLETE 0x00000002
+#define CBA_DEFERRED_SYMBOL_LOAD_FAILURE 0x00000003
+#define CBA_SYMBOLS_UNLOADED 0x00000004
+#define CBA_DUPLICATE_SYMBOL 0x00000005
+#define CBA_READ_MEMORY 0x00000006
+#define CBA_DEFERRED_SYMBOL_LOAD_CANCEL 0x00000007
+#define CBA_SET_OPTIONS 0x00000008
+#define CBA_EVENT 0x00000010
+#define CBA_DEFERRED_SYMBOL_LOAD_PARTIAL 0x00000020
+#define CBA_DEBUG_INFO 0x10000000
+
+ typedef struct _IMAGEHLP_CBA_READ_MEMORY {
+ DWORD64 addr;
+ PVOID buf;
+ DWORD bytes;
+ DWORD *bytesread;
+ } IMAGEHLP_CBA_READ_MEMORY,*PIMAGEHLP_CBA_READ_MEMORY;
+
+ enum {
+ sevInfo = 0,sevProblem,sevAttn,sevFatal,sevMax
+ };
+
+ typedef struct _IMAGEHLP_CBA_EVENT {
+ DWORD severity;
+ DWORD code;
+ PCHAR desc;
+ PVOID object;
+ } IMAGEHLP_CBA_EVENT,*PIMAGEHLP_CBA_EVENT;
+
+ typedef struct _IMAGEHLP_DEFERRED_SYMBOL_LOAD64 {
+ DWORD SizeOfStruct;
+ DWORD64 BaseOfImage;
+ DWORD CheckSum;
+ DWORD TimeDateStamp;
+ CHAR FileName[MAX_PATH];
+ BOOLEAN Reparse;
+ HANDLE hFile;
+ DWORD Flags;
+ } IMAGEHLP_DEFERRED_SYMBOL_LOAD64,*PIMAGEHLP_DEFERRED_SYMBOL_LOAD64;
+
+#define DSLFLAG_MISMATCHED_PDB 0x1
+#define DSLFLAG_MISMATCHED_DBG 0x2
+
+#ifdef _IMAGEHLP64
+#define IMAGEHLP_DEFERRED_SYMBOL_LOAD IMAGEHLP_DEFERRED_SYMBOL_LOAD64
+#define PIMAGEHLP_DEFERRED_SYMBOL_LOAD PIMAGEHLP_DEFERRED_SYMBOL_LOAD64
+#else
+ typedef struct _IMAGEHLP_DEFERRED_SYMBOL_LOAD {
+ DWORD SizeOfStruct;
+ DWORD BaseOfImage;
+ DWORD CheckSum;
+ DWORD TimeDateStamp;
+ CHAR FileName[MAX_PATH];
+ BOOLEAN Reparse;
+ HANDLE hFile;
+ } IMAGEHLP_DEFERRED_SYMBOL_LOAD,*PIMAGEHLP_DEFERRED_SYMBOL_LOAD;
+#endif
+
+ typedef struct _IMAGEHLP_DUPLICATE_SYMBOL64 {
+ DWORD SizeOfStruct;
+ DWORD NumberOfDups;
+ PIMAGEHLP_SYMBOL64 Symbol;
+ DWORD SelectedSymbol;
+ } IMAGEHLP_DUPLICATE_SYMBOL64,*PIMAGEHLP_DUPLICATE_SYMBOL64;
+
+#ifdef _IMAGEHLP64
+#define IMAGEHLP_DUPLICATE_SYMBOL IMAGEHLP_DUPLICATE_SYMBOL64
+#define PIMAGEHLP_DUPLICATE_SYMBOL PIMAGEHLP_DUPLICATE_SYMBOL64
+#else
+ typedef struct _IMAGEHLP_DUPLICATE_SYMBOL {
+ DWORD SizeOfStruct;
+ DWORD NumberOfDups;
+ PIMAGEHLP_SYMBOL Symbol;
+ DWORD SelectedSymbol;
+ } IMAGEHLP_DUPLICATE_SYMBOL,*PIMAGEHLP_DUPLICATE_SYMBOL;
+#endif
+
+ BOOL IMAGEAPI SymSetParentWindow(HWND hwnd);
+ PCHAR IMAGEAPI SymSetHomeDirectory(PCSTR dir);
+ PCHAR IMAGEAPI SymGetHomeDirectory(DWORD type,PSTR dir,size_t size);
+
+ enum {
+ hdBase = 0,hdSym,hdSrc,hdMax
+ };
+
+#define SYMOPT_CASE_INSENSITIVE 0x00000001
+#define SYMOPT_UNDNAME 0x00000002
+#define SYMOPT_DEFERRED_LOADS 0x00000004
+#define SYMOPT_NO_CPP 0x00000008
+#define SYMOPT_LOAD_LINES 0x00000010
+#define SYMOPT_OMAP_FIND_NEAREST 0x00000020
+#define SYMOPT_LOAD_ANYTHING 0x00000040
+#define SYMOPT_IGNORE_CVREC 0x00000080
+#define SYMOPT_NO_UNQUALIFIED_LOADS 0x00000100
+#define SYMOPT_FAIL_CRITICAL_ERRORS 0x00000200
+#define SYMOPT_EXACT_SYMBOLS 0x00000400
+#define SYMOPT_ALLOW_ABSOLUTE_SYMBOLS 0x00000800
+#define SYMOPT_IGNORE_NT_SYMPATH 0x00001000
+#define SYMOPT_INCLUDE_32BIT_MODULES 0x00002000
+#define SYMOPT_PUBLICS_ONLY 0x00004000
+#define SYMOPT_NO_PUBLICS 0x00008000
+#define SYMOPT_AUTO_PUBLICS 0x00010000
+#define SYMOPT_NO_IMAGE_SEARCH 0x00020000
+#define SYMOPT_SECURE 0x00040000
+#define SYMOPT_NO_PROMPTS 0x00080000
+
+#define SYMOPT_DEBUG 0x80000000
+
+ DWORD IMAGEAPI SymSetOptions(DWORD SymOptions);
+ DWORD IMAGEAPI SymGetOptions(VOID);
+ BOOL IMAGEAPI SymCleanup(HANDLE hProcess);
+ BOOL IMAGEAPI SymMatchString(LPSTR string,LPSTR expression,BOOL fCase);
+
+ typedef BOOL (CALLBACK *PSYM_ENUMSOURCFILES_CALLBACK)(PSOURCEFILE pSourceFile,PVOID UserContext);
+
+ BOOL IMAGEAPI SymEnumSourceFiles(HANDLE hProcess,ULONG64 ModBase,LPSTR Mask,PSYM_ENUMSOURCFILES_CALLBACK cbSrcFiles,PVOID UserContext);
+ BOOL IMAGEAPI SymEnumerateModules64(HANDLE hProcess,PSYM_ENUMMODULES_CALLBACK64 EnumModulesCallback,PVOID UserContext);
+
+#ifdef _IMAGEHLP64
+#define SymEnumerateModules SymEnumerateModules64
+#else
+ BOOL IMAGEAPI SymEnumerateModules(HANDLE hProcess,PSYM_ENUMMODULES_CALLBACK EnumModulesCallback,PVOID UserContext);
+#endif
+
+ BOOL IMAGEAPI SymEnumerateSymbols64(HANDLE hProcess,DWORD64 BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACK64 EnumSymbolsCallback,PVOID UserContext);
+ BOOL IMAGEAPI SymEnumerateSymbolsW64(HANDLE hProcess,DWORD64 BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACK64W EnumSymbolsCallback,PVOID UserContext);
+
+#ifdef _IMAGEHLP64
+#define SymEnumerateSymbols SymEnumerateSymbols64
+#define SymEnumerateSymbolsW SymEnumerateSymbolsW64
+#else
+ BOOL IMAGEAPI SymEnumerateSymbols(HANDLE hProcess,DWORD BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext);
+ BOOL IMAGEAPI SymEnumerateSymbolsW(HANDLE hProcess,DWORD BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACKW EnumSymbolsCallback,PVOID UserContext);
+#endif
+
+ BOOL IMAGEAPI EnumerateLoadedModules64(HANDLE hProcess,PENUMLOADED_MODULES_CALLBACK64 EnumLoadedModulesCallback,PVOID UserContext);
+#ifdef _IMAGEHLP64
+#define EnumerateLoadedModules EnumerateLoadedModules64
+#else
+ BOOL IMAGEAPI EnumerateLoadedModules(HANDLE hProcess,PENUMLOADED_MODULES_CALLBACK EnumLoadedModulesCallback,PVOID UserContext);
+#endif
+
+ PVOID IMAGEAPI SymFunctionTableAccess64(HANDLE hProcess,DWORD64 AddrBase);
+
+#ifdef _IMAGEHLP64
+#define SymFunctionTableAccess SymFunctionTableAccess64
+#else
+ PVOID IMAGEAPI SymFunctionTableAccess(HANDLE hProcess,DWORD AddrBase);
+#endif
+
+ BOOL IMAGEAPI SymGetModuleInfo64(HANDLE hProcess,DWORD64 qwAddr,PIMAGEHLP_MODULE64 ModuleInfo);
+ BOOL IMAGEAPI SymGetModuleInfoW64(HANDLE hProcess,DWORD64 qwAddr,PIMAGEHLP_MODULEW64 ModuleInfo);
+
+#ifdef _IMAGEHLP64
+#define SymGetModuleInfo SymGetModuleInfo64
+#define SymGetModuleInfoW SymGetModuleInfoW64
+#else
+ BOOL IMAGEAPI SymGetModuleInfo(HANDLE hProcess,DWORD dwAddr,PIMAGEHLP_MODULE ModuleInfo);
+ BOOL IMAGEAPI SymGetModuleInfoW(HANDLE hProcess,DWORD dwAddr,PIMAGEHLP_MODULEW ModuleInfo);
+#endif
+
+ DWORD64 IMAGEAPI SymGetModuleBase64(HANDLE hProcess,DWORD64 qwAddr);
+
+#ifdef _IMAGEHLP64
+#define SymGetModuleBase SymGetModuleBase64
+#else
+ DWORD IMAGEAPI SymGetModuleBase(HANDLE hProcess,DWORD dwAddr);
+#endif
+
+ BOOL IMAGEAPI SymGetSymNext64(HANDLE hProcess,PIMAGEHLP_SYMBOL64 Symbol);
+
+#ifdef _IMAGEHLP64
+#define SymGetSymNext SymGetSymNext64
+#else
+ BOOL IMAGEAPI SymGetSymNext(HANDLE hProcess,PIMAGEHLP_SYMBOL Symbol);
+#endif
+
+ BOOL IMAGEAPI SymGetSymPrev64(HANDLE hProcess,PIMAGEHLP_SYMBOL64 Symbol);
+
+#ifdef _IMAGEHLP64
+#define SymGetSymPrev SymGetSymPrev64
+#else
+ BOOL IMAGEAPI SymGetSymPrev(HANDLE hProcess,PIMAGEHLP_SYMBOL Symbol);
+#endif
+
+ typedef struct _SRCCODEINFO {
+ DWORD SizeOfStruct;
+ PVOID Key;
+ DWORD64 ModBase;
+ CHAR Obj[MAX_PATH + 1];
+ CHAR FileName[MAX_PATH + 1];
+ DWORD LineNumber;
+ DWORD64 Address;
+ } SRCCODEINFO,*PSRCCODEINFO;
+
+ typedef BOOL (CALLBACK *PSYM_ENUMLINES_CALLBACK)(PSRCCODEINFO LineInfo,PVOID UserContext);
+
+ BOOL IMAGEAPI SymEnumLines(HANDLE hProcess,ULONG64 Base,PCSTR Obj,PCSTR File,PSYM_ENUMLINES_CALLBACK EnumLinesCallback,PVOID UserContext);
+ BOOL IMAGEAPI SymGetLineFromAddr64(HANDLE hProcess,DWORD64 qwAddr,PDWORD pdwDisplacement,PIMAGEHLP_LINE64 Line64);
+
+#ifdef _IMAGEHLP64
+#define SymGetLineFromAddr SymGetLineFromAddr64
+#else
+ BOOL IMAGEAPI SymGetLineFromAddr(HANDLE hProcess,DWORD dwAddr,PDWORD pdwDisplacement,PIMAGEHLP_LINE Line);
+#endif
+
+ BOOL IMAGEAPI SymGetLineFromName64(HANDLE hProcess,PSTR ModuleName,PSTR FileName,DWORD dwLineNumber,PLONG plDisplacement,PIMAGEHLP_LINE64 Line);
+
+#ifdef _IMAGEHLP64
+#define SymGetLineFromName SymGetLineFromName64
+#else
+ BOOL IMAGEAPI SymGetLineFromName(HANDLE hProcess,PSTR ModuleName,PSTR FileName,DWORD dwLineNumber,PLONG plDisplacement,PIMAGEHLP_LINE Line);
+#endif
+
+ BOOL IMAGEAPI SymGetLineNext64(HANDLE hProcess,PIMAGEHLP_LINE64 Line);
+
+#ifdef _IMAGEHLP64
+#define SymGetLineNext SymGetLineNext64
+#else
+ BOOL IMAGEAPI SymGetLineNext(HANDLE hProcess,PIMAGEHLP_LINE Line);
+#endif
+
+ BOOL IMAGEAPI SymGetLinePrev64(HANDLE hProcess,PIMAGEHLP_LINE64 Line);
+
+#ifdef _IMAGEHLP64
+#define SymGetLinePrev SymGetLinePrev64
+#else
+ BOOL IMAGEAPI SymGetLinePrev(HANDLE hProcess,PIMAGEHLP_LINE Line);
+#endif
+
+ BOOL IMAGEAPI SymMatchFileName(PSTR FileName,PSTR Match,PSTR *FileNameStop,PSTR *MatchStop);
+ BOOL IMAGEAPI SymInitialize(HANDLE hProcess,PSTR UserSearchPath,BOOL fInvadeProcess);
+ BOOL IMAGEAPI SymGetSearchPath(HANDLE hProcess,PSTR SearchPath,DWORD SearchPathLength);
+ BOOL IMAGEAPI SymSetSearchPath(HANDLE hProcess,PSTR SearchPath);
+ DWORD64 IMAGEAPI SymLoadModule64(HANDLE hProcess,HANDLE hFile,PSTR ImageName,PSTR ModuleName,DWORD64 BaseOfDll,DWORD SizeOfDll);
+
+#define SLMFLAG_VIRTUAL 0x1
+
+ DWORD64 IMAGEAPI SymLoadModuleEx(HANDLE hProcess,HANDLE hFile,PSTR ImageName,PSTR ModuleName,DWORD64 BaseOfDll,DWORD DllSize,PMODLOAD_DATA Data,DWORD Flags);
+
+#ifdef _IMAGEHLP64
+#define SymLoadModule SymLoadModule64
+#else
+ DWORD IMAGEAPI SymLoadModule(HANDLE hProcess,HANDLE hFile,PSTR ImageName,PSTR ModuleName,DWORD BaseOfDll,DWORD SizeOfDll);
+#endif
+
+ BOOL IMAGEAPI SymUnloadModule64(HANDLE hProcess,DWORD64 BaseOfDll);
+
+#ifdef _IMAGEHLP64
+#define SymUnloadModule SymUnloadModule64
+#else
+ BOOL IMAGEAPI SymUnloadModule(HANDLE hProcess,DWORD BaseOfDll);
+#endif
+
+ BOOL IMAGEAPI SymUnDName64(PIMAGEHLP_SYMBOL64 sym,PSTR UnDecName,DWORD UnDecNameLength);
+
+#ifdef _IMAGEHLP64
+#define SymUnDName SymUnDName64
+#else
+ BOOL IMAGEAPI SymUnDName(PIMAGEHLP_SYMBOL sym,PSTR UnDecName,DWORD UnDecNameLength);
+#endif
+
+ BOOL IMAGEAPI SymRegisterCallback64(HANDLE hProcess,PSYMBOL_REGISTERED_CALLBACK64 CallbackFunction,ULONG64 UserContext);
+
+ BOOL IMAGEAPI SymRegisterFunctionEntryCallback64(HANDLE hProcess,PSYMBOL_FUNCENTRY_CALLBACK64 CallbackFunction,ULONG64 UserContext);
+
+#ifdef _IMAGEHLP64
+#define SymRegisterCallback SymRegisterCallback64
+#define SymRegisterFunctionEntryCallback SymRegisterFunctionEntryCallback64
+#else
+ BOOL IMAGEAPI SymRegisterCallback(HANDLE hProcess,PSYMBOL_REGISTERED_CALLBACK CallbackFunction,PVOID UserContext);
+ BOOL IMAGEAPI SymRegisterFunctionEntryCallback(HANDLE hProcess,PSYMBOL_FUNCENTRY_CALLBACK CallbackFunction,PVOID UserContext);
+#endif
+
+ typedef struct _IMAGEHLP_SYMBOL_SRC {
+ DWORD sizeofstruct;
+ DWORD type;
+ char file[MAX_PATH];
+ } IMAGEHLP_SYMBOL_SRC,*PIMAGEHLP_SYMBOL_SRC;
+
+ typedef struct _MODULE_TYPE_INFO {
+ USHORT dataLength;
+ USHORT leaf;
+ BYTE data[1];
+ } MODULE_TYPE_INFO,*PMODULE_TYPE_INFO;
+
+ typedef struct _SYMBOL_INFO {
+ ULONG SizeOfStruct;
+ ULONG TypeIndex;
+ ULONG64 Reserved[2];
+ ULONG info;
+ ULONG Size;
+ ULONG64 ModBase;
+ ULONG Flags;
+ ULONG64 Value;
+ ULONG64 Address;
+ ULONG Register;
+ ULONG Scope;
+ ULONG Tag;
+ ULONG NameLen;
+ ULONG MaxNameLen;
+ CHAR Name[1];
+ } SYMBOL_INFO,*PSYMBOL_INFO;
+
+ typedef struct _SYMBOL_INFO_PACKAGE {
+ SYMBOL_INFO si;
+ CHAR name[MAX_SYM_NAME + 1];
+ } SYMBOL_INFO_PACKAGE,*PSYMBOL_INFO_PACKAGE;
+
+ typedef struct _IMAGEHLP_STACK_FRAME
+ {
+ ULONG64 InstructionOffset;
+ ULONG64 ReturnOffset;
+ ULONG64 FrameOffset;
+ ULONG64 StackOffset;
+ ULONG64 BackingStoreOffset;
+ ULONG64 FuncTableEntry;
+ ULONG64 Params[4];
+ ULONG64 Reserved[5];
+ BOOL Virtual;
+ ULONG Reserved2;
+ } IMAGEHLP_STACK_FRAME,*PIMAGEHLP_STACK_FRAME;
+
+ typedef VOID IMAGEHLP_CONTEXT,*PIMAGEHLP_CONTEXT;
+
+ BOOL IMAGEAPI SymSetContext(HANDLE hProcess,PIMAGEHLP_STACK_FRAME StackFrame,PIMAGEHLP_CONTEXT Context);
+ BOOL IMAGEAPI SymFromAddr(HANDLE hProcess,DWORD64 Address,PDWORD64 Displacement,PSYMBOL_INFO Symbol);
+ BOOL IMAGEAPI SymFromToken(HANDLE hProcess,DWORD64 Base,DWORD Token,PSYMBOL_INFO Symbol);
+ BOOL IMAGEAPI SymFromName(HANDLE hProcess,LPSTR Name,PSYMBOL_INFO Symbol);
+
+ typedef BOOL (CALLBACK *PSYM_ENUMERATESYMBOLS_CALLBACK)(PSYMBOL_INFO pSymInfo,ULONG SymbolSize,PVOID UserContext);
+
+ BOOL IMAGEAPI SymEnumSymbols(HANDLE hProcess,ULONG64 BaseOfDll,PCSTR Mask,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext);
+ BOOL IMAGEAPI SymEnumSymbolsForAddr(HANDLE hProcess,DWORD64 Address,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext);
+
+#define SYMENUMFLAG_FULLSRCH 1
+#define SYMENUMFLAG_SPEEDSRCH 2
+
+ typedef enum _IMAGEHLP_SYMBOL_TYPE_INFO {
+ TI_GET_SYMTAG,TI_GET_SYMNAME,TI_GET_LENGTH,TI_GET_TYPE,TI_GET_TYPEID,TI_GET_BASETYPE,TI_GET_ARRAYINDEXTYPEID,TI_FINDCHILDREN,
+ TI_GET_DATAKIND,TI_GET_ADDRESSOFFSET,TI_GET_OFFSET,TI_GET_VALUE,TI_GET_COUNT,TI_GET_CHILDRENCOUNT,TI_GET_BITPOSITION,TI_GET_VIRTUALBASECLASS,
+ TI_GET_VIRTUALTABLESHAPEID,TI_GET_VIRTUALBASEPOINTEROFFSET,TI_GET_CLASSPARENTID,TI_GET_NESTED,TI_GET_SYMINDEX,TI_GET_LEXICALPARENT,
+ TI_GET_ADDRESS,TI_GET_THISADJUST,TI_GET_UDTKIND,TI_IS_EQUIV_TO,TI_GET_CALLING_CONVENTION
+ } IMAGEHLP_SYMBOL_TYPE_INFO;
+
+ typedef struct _TI_FINDCHILDREN_PARAMS {
+ ULONG Count;
+ ULONG Start;
+ ULONG ChildId[1];
+ } TI_FINDCHILDREN_PARAMS;
+
+ BOOL IMAGEAPI SymGetTypeInfo(HANDLE hProcess,DWORD64 ModBase,ULONG TypeId,IMAGEHLP_SYMBOL_TYPE_INFO GetType,PVOID pInfo);
+ BOOL IMAGEAPI SymEnumTypes(HANDLE hProcess,ULONG64 BaseOfDll,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext);
+ BOOL IMAGEAPI SymGetTypeFromName(HANDLE hProcess,ULONG64 BaseOfDll,LPSTR Name,PSYMBOL_INFO Symbol);
+ BOOL IMAGEAPI SymAddSymbol(HANDLE hProcess,ULONG64 BaseOfDll,PCSTR Name,DWORD64 Address,DWORD Size,DWORD Flags);
+ BOOL IMAGEAPI SymDeleteSymbol(HANDLE hProcess,ULONG64 BaseOfDll,PCSTR Name,DWORD64 Address,DWORD Flags);
+
+ typedef BOOL (WINAPI *PDBGHELP_CREATE_USER_DUMP_CALLBACK)(DWORD DataType,PVOID *Data,LPDWORD DataLength,PVOID UserData);
+
+ BOOL WINAPI DbgHelpCreateUserDump(LPSTR FileName,PDBGHELP_CREATE_USER_DUMP_CALLBACK Callback,PVOID UserData);
+ BOOL WINAPI DbgHelpCreateUserDumpW(LPWSTR FileName,PDBGHELP_CREATE_USER_DUMP_CALLBACK Callback,PVOID UserData);
+ BOOL IMAGEAPI SymGetSymFromAddr64(HANDLE hProcess,DWORD64 qwAddr,PDWORD64 pdwDisplacement,PIMAGEHLP_SYMBOL64 Symbol);
+
+#ifdef _IMAGEHLP64
+#define SymGetSymFromAddr SymGetSymFromAddr64
+#else
+ BOOL IMAGEAPI SymGetSymFromAddr(HANDLE hProcess,DWORD dwAddr,PDWORD pdwDisplacement,PIMAGEHLP_SYMBOL Symbol);
+#endif
+
+ BOOL IMAGEAPI SymGetSymFromName64(HANDLE hProcess,PSTR Name,PIMAGEHLP_SYMBOL64 Symbol);
+
+#ifdef _IMAGEHLP64
+#define SymGetSymFromName SymGetSymFromName64
+#else
+ BOOL IMAGEAPI SymGetSymFromName(HANDLE hProcess,PSTR Name,PIMAGEHLP_SYMBOL Symbol);
+#endif
+
+ DBHLP_DEPRECIATED BOOL IMAGEAPI FindFileInPath(HANDLE hprocess,LPSTR SearchPath,LPSTR FileName,PVOID id,DWORD two,DWORD three,DWORD flags,LPSTR FilePath);
+ DBHLP_DEPRECIATED BOOL IMAGEAPI FindFileInSearchPath(HANDLE hprocess,LPSTR SearchPath,LPSTR FileName,DWORD one,DWORD two,DWORD three,LPSTR FilePath);
+ DBHLP_DEPRECIATED BOOL IMAGEAPI SymEnumSym(HANDLE hProcess,ULONG64 BaseOfDll,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext);
+
+#define SYMF_OMAP_GENERATED 0x00000001
+#define SYMF_OMAP_MODIFIED 0x00000002
+#define SYMF_REGISTER 0x00000008
+#define SYMF_REGREL 0x00000010
+#define SYMF_FRAMEREL 0x00000020
+#define SYMF_PARAMETER 0x00000040
+#define SYMF_LOCAL 0x00000080
+#define SYMF_CONSTANT 0x00000100
+#define SYMF_EXPORT 0x00000200
+#define SYMF_FORWARDER 0x00000400
+#define SYMF_FUNCTION 0x00000800
+#define SYMF_VIRTUAL 0x00001000
+#define SYMF_THUNK 0x00002000
+#define SYMF_TLSREL 0x00004000
+
+#define IMAGEHLP_SYMBOL_INFO_VALUEPRESENT 1
+#define IMAGEHLP_SYMBOL_INFO_REGISTER SYMF_REGISTER
+#define IMAGEHLP_SYMBOL_INFO_REGRELATIVE SYMF_REGREL
+#define IMAGEHLP_SYMBOL_INFO_FRAMERELATIVE SYMF_FRAMEREL
+#define IMAGEHLP_SYMBOL_INFO_PARAMETER SYMF_PARAMETER
+#define IMAGEHLP_SYMBOL_INFO_LOCAL SYMF_LOCAL
+#define IMAGEHLP_SYMBOL_INFO_CONSTANT SYMF_CONSTANT
+#define IMAGEHLP_SYMBOL_FUNCTION SYMF_FUNCTION
+#define IMAGEHLP_SYMBOL_VIRTUAL SYMF_VIRTUAL
+#define IMAGEHLP_SYMBOL_THUNK SYMF_THUNK
+#define IMAGEHLP_SYMBOL_INFO_TLSRELATIVE SYMF_TLSREL
+
+#include <pshpack4.h>
+
+#define MINIDUMP_SIGNATURE ('PMDM')
+#define MINIDUMP_VERSION (42899)
+ typedef DWORD RVA;
+ typedef ULONG64 RVA64;
+
+ typedef struct _MINIDUMP_LOCATION_DESCRIPTOR {
+ ULONG32 DataSize;
+ RVA Rva;
+ } MINIDUMP_LOCATION_DESCRIPTOR;
+
+ typedef struct _MINIDUMP_LOCATION_DESCRIPTOR64 {
+ ULONG64 DataSize;
+ RVA64 Rva;
+ } MINIDUMP_LOCATION_DESCRIPTOR64;
+
+ typedef struct _MINIDUMP_MEMORY_DESCRIPTOR {
+ ULONG64 StartOfMemoryRange;
+ MINIDUMP_LOCATION_DESCRIPTOR Memory;
+ } MINIDUMP_MEMORY_DESCRIPTOR,*PMINIDUMP_MEMORY_DESCRIPTOR;
+
+ typedef struct _MINIDUMP_MEMORY_DESCRIPTOR64 {
+ ULONG64 StartOfMemoryRange;
+ ULONG64 DataSize;
+ } MINIDUMP_MEMORY_DESCRIPTOR64,*PMINIDUMP_MEMORY_DESCRIPTOR64;
+
+ typedef struct _MINIDUMP_HEADER {
+ ULONG32 Signature;
+ ULONG32 Version;
+ ULONG32 NumberOfStreams;
+ RVA StreamDirectoryRva;
+ ULONG32 CheckSum;
+ union {
+ ULONG32 Reserved;
+ ULONG32 TimeDateStamp;
+ } DUMMYUNIONNAME;
+ ULONG64 Flags;
+ } MINIDUMP_HEADER,*PMINIDUMP_HEADER;
+
+ typedef struct _MINIDUMP_DIRECTORY {
+ ULONG32 StreamType;
+ MINIDUMP_LOCATION_DESCRIPTOR Location;
+ } MINIDUMP_DIRECTORY,*PMINIDUMP_DIRECTORY;
+
+ typedef struct _MINIDUMP_STRING {
+ ULONG32 Length;
+ WCHAR Buffer [0];
+ } MINIDUMP_STRING,*PMINIDUMP_STRING;
+
+ typedef enum _MINIDUMP_STREAM_TYPE {
+ UnusedStream = 0,ReservedStream0 = 1,ReservedStream1 = 2,ThreadListStream = 3,ModuleListStream = 4,MemoryListStream = 5,
+ ExceptionStream = 6,SystemInfoStream = 7,ThreadExListStream = 8,Memory64ListStream = 9,CommentStreamA = 10,CommentStreamW = 11,
+ HandleDataStream = 12,FunctionTableStream = 13,UnloadedModuleListStream = 14,MiscInfoStream = 15,LastReservedStream = 0xffff
+ } MINIDUMP_STREAM_TYPE;
+
+ typedef union _CPU_INFORMATION {
+ struct {
+ ULONG32 VendorId [3 ];
+ ULONG32 VersionInformation;
+ ULONG32 FeatureInformation;
+ ULONG32 AMDExtendedCpuFeatures;
+ } X86CpuInfo;
+ struct {
+ ULONG64 ProcessorFeatures [2 ];
+ } OtherCpuInfo;
+ } CPU_INFORMATION,*PCPU_INFORMATION;
+
+ typedef struct _MINIDUMP_SYSTEM_INFO {
+ USHORT ProcessorArchitecture;
+ USHORT ProcessorLevel;
+ USHORT ProcessorRevision;
+ union {
+ USHORT Reserved0;
+ struct {
+ UCHAR NumberOfProcessors;
+ UCHAR ProductType;
+ } DUMMYSTRUCTNAME;
+ } DUMMYUNIONNAME;
+ ULONG32 MajorVersion;
+ ULONG32 MinorVersion;
+ ULONG32 BuildNumber;
+ ULONG32 PlatformId;
+ RVA CSDVersionRva;
+ union {
+ ULONG32 Reserved1;
+ struct {
+ USHORT SuiteMask;
+ USHORT Reserved2;
+ } DUMMYSTRUCTNAME;
+ } DUMMYUNIONNAME1;
+ CPU_INFORMATION Cpu;
+ } MINIDUMP_SYSTEM_INFO,*PMINIDUMP_SYSTEM_INFO;
+
+ C_ASSERT (sizeof (((PPROCESS_INFORMATION)0)->dwThreadId)==4);
+
+ typedef struct _MINIDUMP_THREAD {
+ ULONG32 ThreadId;
+ ULONG32 SuspendCount;
+ ULONG32 PriorityClass;
+ ULONG32 Priority;
+ ULONG64 Teb;
+ MINIDUMP_MEMORY_DESCRIPTOR Stack;
+ MINIDUMP_LOCATION_DESCRIPTOR ThreadContext;
+ } MINIDUMP_THREAD,*PMINIDUMP_THREAD;
+
+ typedef struct _MINIDUMP_THREAD_LIST {
+ ULONG32 NumberOfThreads;
+ MINIDUMP_THREAD Threads [0];
+ } MINIDUMP_THREAD_LIST,*PMINIDUMP_THREAD_LIST;
+
+ typedef struct _MINIDUMP_THREAD_EX {
+ ULONG32 ThreadId;
+ ULONG32 SuspendCount;
+ ULONG32 PriorityClass;
+ ULONG32 Priority;
+ ULONG64 Teb;
+ MINIDUMP_MEMORY_DESCRIPTOR Stack;
+ MINIDUMP_LOCATION_DESCRIPTOR ThreadContext;
+ MINIDUMP_MEMORY_DESCRIPTOR BackingStore;
+ } MINIDUMP_THREAD_EX,*PMINIDUMP_THREAD_EX;
+
+ typedef struct _MINIDUMP_THREAD_EX_LIST {
+ ULONG32 NumberOfThreads;
+ MINIDUMP_THREAD_EX Threads [0];
+ } MINIDUMP_THREAD_EX_LIST,*PMINIDUMP_THREAD_EX_LIST;
+
+ typedef struct _MINIDUMP_EXCEPTION {
+ ULONG32 ExceptionCode;
+ ULONG32 ExceptionFlags;
+ ULONG64 ExceptionRecord;
+ ULONG64 ExceptionAddress;
+ ULONG32 NumberParameters;
+ ULONG32 __unusedAlignment;
+ ULONG64 ExceptionInformation [EXCEPTION_MAXIMUM_PARAMETERS ];
+ } MINIDUMP_EXCEPTION,*PMINIDUMP_EXCEPTION;
+
+ typedef struct MINIDUMP_EXCEPTION_STREAM {
+ ULONG32 ThreadId;
+ ULONG32 __alignment;
+ MINIDUMP_EXCEPTION ExceptionRecord;
+ MINIDUMP_LOCATION_DESCRIPTOR ThreadContext;
+ } MINIDUMP_EXCEPTION_STREAM,*PMINIDUMP_EXCEPTION_STREAM;
+
+ typedef struct _MINIDUMP_MODULE {
+ ULONG64 BaseOfImage;
+ ULONG32 SizeOfImage;
+ ULONG32 CheckSum;
+ ULONG32 TimeDateStamp;
+ RVA ModuleNameRva;
+ VS_FIXEDFILEINFO VersionInfo;
+ MINIDUMP_LOCATION_DESCRIPTOR CvRecord;
+ MINIDUMP_LOCATION_DESCRIPTOR MiscRecord;
+ ULONG64 Reserved0;
+ ULONG64 Reserved1;
+ } MINIDUMP_MODULE,*PMINIDUMP_MODULE;
+
+ typedef struct _MINIDUMP_MODULE_LIST {
+ ULONG32 NumberOfModules;
+ MINIDUMP_MODULE Modules [0 ];
+ } MINIDUMP_MODULE_LIST,*PMINIDUMP_MODULE_LIST;
+
+ typedef struct _MINIDUMP_MEMORY_LIST {
+ ULONG32 NumberOfMemoryRanges;
+ MINIDUMP_MEMORY_DESCRIPTOR MemoryRanges [0];
+ } MINIDUMP_MEMORY_LIST,*PMINIDUMP_MEMORY_LIST;
+
+ typedef struct _MINIDUMP_MEMORY64_LIST {
+ ULONG64 NumberOfMemoryRanges;
+ RVA64 BaseRva;
+ MINIDUMP_MEMORY_DESCRIPTOR64 MemoryRanges [0];
+ } MINIDUMP_MEMORY64_LIST,*PMINIDUMP_MEMORY64_LIST;
+
+ typedef struct _MINIDUMP_EXCEPTION_INFORMATION {
+ DWORD ThreadId;
+ PEXCEPTION_POINTERS ExceptionPointers;
+ BOOL ClientPointers;
+ } MINIDUMP_EXCEPTION_INFORMATION,*PMINIDUMP_EXCEPTION_INFORMATION;
+
+ typedef struct _MINIDUMP_EXCEPTION_INFORMATION64 {
+ DWORD ThreadId;
+ ULONG64 ExceptionRecord;
+ ULONG64 ContextRecord;
+ BOOL ClientPointers;
+ } MINIDUMP_EXCEPTION_INFORMATION64,*PMINIDUMP_EXCEPTION_INFORMATION64;
+
+ typedef struct _MINIDUMP_HANDLE_DESCRIPTOR {
+ ULONG64 Handle;
+ RVA TypeNameRva;
+ RVA ObjectNameRva;
+ ULONG32 Attributes;
+ ULONG32 GrantedAccess;
+ ULONG32 HandleCount;
+ ULONG32 PointerCount;
+ } MINIDUMP_HANDLE_DESCRIPTOR,*PMINIDUMP_HANDLE_DESCRIPTOR;
+
+ typedef struct _MINIDUMP_HANDLE_DATA_STREAM {
+ ULONG32 SizeOfHeader;
+ ULONG32 SizeOfDescriptor;
+ ULONG32 NumberOfDescriptors;
+ ULONG32 Reserved;
+ } MINIDUMP_HANDLE_DATA_STREAM,*PMINIDUMP_HANDLE_DATA_STREAM;
+
+ typedef struct _MINIDUMP_FUNCTION_TABLE_DESCRIPTOR {
+ ULONG64 MinimumAddress;
+ ULONG64 MaximumAddress;
+ ULONG64 BaseAddress;
+ ULONG32 EntryCount;
+ ULONG32 SizeOfAlignPad;
+ } MINIDUMP_FUNCTION_TABLE_DESCRIPTOR,*PMINIDUMP_FUNCTION_TABLE_DESCRIPTOR;
+
+ typedef struct _MINIDUMP_FUNCTION_TABLE_STREAM {
+ ULONG32 SizeOfHeader;
+ ULONG32 SizeOfDescriptor;
+ ULONG32 SizeOfNativeDescriptor;
+ ULONG32 SizeOfFunctionEntry;
+ ULONG32 NumberOfDescriptors;
+ ULONG32 SizeOfAlignPad;
+ } MINIDUMP_FUNCTION_TABLE_STREAM,*PMINIDUMP_FUNCTION_TABLE_STREAM;
+
+ typedef struct _MINIDUMP_UNLOADED_MODULE {
+ ULONG64 BaseOfImage;
+ ULONG32 SizeOfImage;
+ ULONG32 CheckSum;
+ ULONG32 TimeDateStamp;
+ RVA ModuleNameRva;
+ } MINIDUMP_UNLOADED_MODULE,*PMINIDUMP_UNLOADED_MODULE;
+
+ typedef struct _MINIDUMP_UNLOADED_MODULE_LIST {
+ ULONG32 SizeOfHeader;
+ ULONG32 SizeOfEntry;
+ ULONG32 NumberOfEntries;
+ } MINIDUMP_UNLOADED_MODULE_LIST,*PMINIDUMP_UNLOADED_MODULE_LIST;
+
+#define MINIDUMP_MISC1_PROCESS_ID 0x00000001
+#define MINIDUMP_MISC1_PROCESS_TIMES 0x00000002
+
+ typedef struct _MINIDUMP_MISC_INFO {
+ ULONG32 SizeOfInfo;
+ ULONG32 Flags1;
+ ULONG32 ProcessId;
+ ULONG32 ProcessCreateTime;
+ ULONG32 ProcessUserTime;
+ ULONG32 ProcessKernelTime;
+ } MINIDUMP_MISC_INFO,*PMINIDUMP_MISC_INFO;
+
+ typedef struct _MINIDUMP_USER_RECORD {
+ ULONG32 Type;
+ MINIDUMP_LOCATION_DESCRIPTOR Memory;
+ } MINIDUMP_USER_RECORD,*PMINIDUMP_USER_RECORD;
+
+ typedef struct _MINIDUMP_USER_STREAM {
+ ULONG32 Type;
+ ULONG BufferSize;
+ PVOID Buffer;
+ } MINIDUMP_USER_STREAM,*PMINIDUMP_USER_STREAM;
+
+ typedef struct _MINIDUMP_USER_STREAM_INFORMATION {
+ ULONG UserStreamCount;
+ PMINIDUMP_USER_STREAM UserStreamArray;
+ } MINIDUMP_USER_STREAM_INFORMATION,*PMINIDUMP_USER_STREAM_INFORMATION;
+
+ typedef enum _MINIDUMP_CALLBACK_TYPE {
+ ModuleCallback,ThreadCallback,ThreadExCallback,IncludeThreadCallback,IncludeModuleCallback,MemoryCallback
+ } MINIDUMP_CALLBACK_TYPE;
+
+ typedef struct _MINIDUMP_THREAD_CALLBACK {
+ ULONG ThreadId;
+ HANDLE ThreadHandle;
+ CONTEXT Context;
+ ULONG SizeOfContext;
+ ULONG64 StackBase;
+ ULONG64 StackEnd;
+ } MINIDUMP_THREAD_CALLBACK,*PMINIDUMP_THREAD_CALLBACK;
+
+ typedef struct _MINIDUMP_THREAD_EX_CALLBACK {
+ ULONG ThreadId;
+ HANDLE ThreadHandle;
+ CONTEXT Context;
+ ULONG SizeOfContext;
+ ULONG64 StackBase;
+ ULONG64 StackEnd;
+ ULONG64 BackingStoreBase;
+ ULONG64 BackingStoreEnd;
+ } MINIDUMP_THREAD_EX_CALLBACK,*PMINIDUMP_THREAD_EX_CALLBACK;
+
+ typedef struct _MINIDUMP_INCLUDE_THREAD_CALLBACK {
+ ULONG ThreadId;
+ } MINIDUMP_INCLUDE_THREAD_CALLBACK,*PMINIDUMP_INCLUDE_THREAD_CALLBACK;
+
+ typedef enum _THREAD_WRITE_FLAGS {
+ ThreadWriteThread = 0x0001,ThreadWriteStack = 0x0002,ThreadWriteContext = 0x0004,ThreadWriteBackingStore = 0x0008,
+ ThreadWriteInstructionWindow = 0x0010,ThreadWriteThreadData = 0x0020
+ } THREAD_WRITE_FLAGS;
+
+ typedef struct _MINIDUMP_MODULE_CALLBACK {
+ PWCHAR FullPath;
+ ULONG64 BaseOfImage;
+ ULONG SizeOfImage;
+ ULONG CheckSum;
+ ULONG TimeDateStamp;
+ VS_FIXEDFILEINFO VersionInfo;
+ PVOID CvRecord;
+ ULONG SizeOfCvRecord;
+ PVOID MiscRecord;
+ ULONG SizeOfMiscRecord;
+ } MINIDUMP_MODULE_CALLBACK,*PMINIDUMP_MODULE_CALLBACK;
+
+ typedef struct _MINIDUMP_INCLUDE_MODULE_CALLBACK {
+ ULONG64 BaseOfImage;
+ } MINIDUMP_INCLUDE_MODULE_CALLBACK,*PMINIDUMP_INCLUDE_MODULE_CALLBACK;
+
+ typedef enum _MODULE_WRITE_FLAGS {
+ ModuleWriteModule = 0x0001,ModuleWriteDataSeg = 0x0002,ModuleWriteMiscRecord = 0x0004,ModuleWriteCvRecord = 0x0008,
+ ModuleReferencedByMemory = 0x0010
+ } MODULE_WRITE_FLAGS;
+
+ typedef struct _MINIDUMP_CALLBACK_INPUT {
+ ULONG ProcessId;
+ HANDLE ProcessHandle;
+ ULONG CallbackType;
+ union {
+ MINIDUMP_THREAD_CALLBACK Thread;
+ MINIDUMP_THREAD_EX_CALLBACK ThreadEx;
+ MINIDUMP_MODULE_CALLBACK Module;
+ MINIDUMP_INCLUDE_THREAD_CALLBACK IncludeThread;
+ MINIDUMP_INCLUDE_MODULE_CALLBACK IncludeModule;
+ } DUMMYUNIONNAME;
+ } MINIDUMP_CALLBACK_INPUT,*PMINIDUMP_CALLBACK_INPUT;
+
+ typedef struct _MINIDUMP_CALLBACK_OUTPUT {
+ union {
+ ULONG ModuleWriteFlags;
+ ULONG ThreadWriteFlags;
+ struct {
+ ULONG64 MemoryBase;
+ ULONG MemorySize;
+ } DUMMYSTRUCTNAME;
+ } DUMMYUNIONNAME;
+ } MINIDUMP_CALLBACK_OUTPUT,*PMINIDUMP_CALLBACK_OUTPUT;
+
+ typedef enum _MINIDUMP_TYPE {
+ MiniDumpNormal = 0x0000,MiniDumpWithDataSegs = 0x0001,MiniDumpWithFullMemory = 0x0002,MiniDumpWithHandleData = 0x0004,
+ MiniDumpFilterMemory = 0x0008,MiniDumpScanMemory = 0x0010,MiniDumpWithUnloadedModules = 0x0020,MiniDumpWithIndirectlyReferencedMemory = 0x0040,
+ MiniDumpFilterModulePaths = 0x0080,MiniDumpWithProcessThreadData = 0x0100,MiniDumpWithPrivateReadWriteMemory = 0x0200,
+ MiniDumpWithoutOptionalData = 0x0400
+ } MINIDUMP_TYPE;
+
+ typedef BOOL (WINAPI *MINIDUMP_CALLBACK_ROUTINE)(PVOID CallbackParam,CONST PMINIDUMP_CALLBACK_INPUT CallbackInput,PMINIDUMP_CALLBACK_OUTPUT CallbackOutput);
+
+ typedef struct _MINIDUMP_CALLBACK_INFORMATION {
+ MINIDUMP_CALLBACK_ROUTINE CallbackRoutine;
+ PVOID CallbackParam;
+ } MINIDUMP_CALLBACK_INFORMATION,*PMINIDUMP_CALLBACK_INFORMATION;
+
+#define RVA_TO_ADDR(Mapping,Rva) ((PVOID)(((ULONG_PTR) (Mapping)) + (Rva)))
+
+ BOOL WINAPI MiniDumpWriteDump(HANDLE hProcess,DWORD ProcessId,HANDLE hFile,MINIDUMP_TYPE DumpType,CONST PMINIDUMP_EXCEPTION_INFORMATION ExceptionParam,CONST PMINIDUMP_USER_STREAM_INFORMATION UserStreamParam,CONST PMINIDUMP_CALLBACK_INFORMATION CallbackParam);
+ BOOL WINAPI MiniDumpReadDumpStream(PVOID BaseOfDump,ULONG StreamNumber,PMINIDUMP_DIRECTORY *Dir,PVOID *StreamPointer,ULONG *StreamSize);
+
+#include <poppack.h>
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index c11f7d383d..421726b40e 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -128,21 +128,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
}
- /* vertex shader - still required to provide the linkage between
- * fragment shader input semantics and vertex_element/buffers.
- */
- {
- const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
- TGSI_SEMANTIC_GENERIC };
- const uint semantic_indexes[] = { 0, 0 };
- ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
- semantic_indexes);
- }
-
- /* fragment shader */
- ctx->fs[TGSI_WRITEMASK_XYZW] =
- util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D,
- TGSI_INTERPOLATE_LINEAR);
ctx->vbuf = NULL;
/* init vertex data that doesn't change */
@@ -170,7 +155,8 @@ util_destroy_blit(struct blit_state *ctx)
struct pipe_context *pipe = ctx->pipe;
unsigned i;
- pipe->delete_vs_state(pipe, ctx->vs);
+ if (ctx->vs)
+ pipe->delete_vs_state(pipe, ctx->vs);
for (i = 0; i < Elements(ctx->fs); i++)
if (ctx->fs[i])
@@ -186,6 +172,59 @@ util_destroy_blit(struct blit_state *ctx)
/**
+ * Helper function to set the fragment shaders.
+ */
+static INLINE void
+set_fragment_shader(struct blit_state *ctx, uint writemask)
+{
+ if (!ctx->fs[writemask])
+ ctx->fs[writemask] =
+ util_make_fragment_tex_shader_writemask(ctx->pipe, TGSI_TEXTURE_2D,
+ TGSI_INTERPOLATE_LINEAR,
+ writemask);
+
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
+}
+
+
+/**
+ * Helper function to set the depthwrite shader.
+ */
+static INLINE void
+set_depth_fragment_shader(struct blit_state *ctx)
+{
+ if (!ctx->fs_depth)
+ ctx->fs_depth =
+ util_make_fragment_tex_shader_writedepth(ctx->pipe, TGSI_TEXTURE_2D,
+ TGSI_INTERPOLATE_LINEAR);
+
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs_depth);
+}
+
+
+/**
+ * Helper function to set the vertex shader.
+ */
+static INLINE void
+set_vertex_shader(struct blit_state *ctx)
+{
+ /* vertex shader - still required to provide the linkage between
+ * fragment shader input semantics and vertex_element/buffers.
+ */
+ if (!ctx->vs) {
+ const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_GENERIC };
+ const uint semantic_indexes[] = { 0, 0 };
+ ctx->vs = util_make_vertex_passthrough_shader(ctx->pipe, 2,
+ semantic_names,
+ semantic_indexes);
+ }
+
+ cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
+}
+
+
+/**
* Get offset of next free slot in vertex buffer for quad vertices.
*/
static unsigned
@@ -199,6 +238,7 @@ get_next_slot( struct blit_state *ctx )
if (!ctx->vbuf) {
ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
max_slots * sizeof ctx->vertices);
}
@@ -303,8 +343,10 @@ util_blit_pixels_writemask(struct blit_state *ctx,
{
struct pipe_context *pipe = ctx->pipe;
struct pipe_screen *screen = pipe->screen;
+ enum pipe_format src_format, dst_format;
struct pipe_sampler_view *sampler_view = NULL;
struct pipe_sampler_view sv_templ;
+ struct pipe_surface *dst_surface;
struct pipe_framebuffer_state fb;
const int srcW = abs(srcX1 - srcX0);
const int srcH = abs(srcY1 - srcY0);
@@ -325,12 +367,15 @@ util_blit_pixels_writemask(struct blit_state *ctx,
regions_overlap(srcX0, srcY0, srcX1, srcY1,
dstX0, dstY0, dstX1, dstY1);
+ src_format = util_format_linear(src_tex->format);
+ dst_format = util_format_linear(dst->format);
+
/*
* Check for simple case: no format conversion, no flipping, no stretching,
* no overlapping.
* Filter mode should not matter since there's no stretching.
*/
- if (dst->format == src_tex->format &&
+ if (dst_format == src_format &&
srcX0 < srcX1 &&
dstX0 < dstX1 &&
srcY0 < srcY1 &&
@@ -353,6 +398,14 @@ util_blit_pixels_writemask(struct blit_state *ctx,
return;
}
+ if (dst_format == dst->format) {
+ dst_surface = dst;
+ } else {
+ struct pipe_surface templ = *dst;
+ templ.format = dst_format;
+ dst_surface = pipe->create_surface(pipe, dst->texture, &templ);
+ }
+
/* Create a temporary texture when src and dest alias or when src
* is anything other than a 2d texture.
* XXX should just use appropriate shader to access 1d / 3d slice / cube face,
@@ -360,9 +413,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
*
* This can still be improved upon.
*/
- if ((src_tex == dst->texture &&
- dst->u.tex.level == src_level &&
- dst->u.tex.first_layer == srcZ0) ||
+ if ((src_tex == dst_surface->texture &&
+ dst_surface->u.tex.level == src_level &&
+ dst_surface->u.tex.first_layer == srcZ0) ||
(src_tex->target != PIPE_TEXTURE_2D &&
src_tex->target != PIPE_TEXTURE_2D &&
src_tex->target != PIPE_TEXTURE_RECT))
@@ -391,7 +444,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* create temp texture */
memset(&texTemp, 0, sizeof(texTemp));
texTemp.target = ctx->internal_target;
- texTemp.format = src_tex->format;
+ texTemp.format = src_format;
texTemp.last_level = 0;
texTemp.width0 = srcW;
texTemp.height0 = srcH;
@@ -438,7 +491,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
pipe_resource_reference(&tex, NULL);
}
else {
- u_sampler_view_default_template(&sv_templ, src_tex, src_tex->format);
+ u_sampler_view_default_template(&sv_templ, src_tex, src_format);
sampler_view = pipe->create_sampler_view(pipe, src_tex, &sv_templ);
if (!sampler_view) {
@@ -459,15 +512,15 @@ util_blit_pixels_writemask(struct blit_state *ctx,
}
}
- dst_is_depth = util_format_is_depth_or_stencil(dst->format);
+ dst_is_depth = util_format_is_depth_or_stencil(dst_format);
assert(screen->is_format_supported(screen, sampler_view->format, ctx->internal_target,
sampler_view->texture->nr_samples,
- PIPE_BIND_SAMPLER_VIEW, 0));
- assert(screen->is_format_supported(screen, dst->format, ctx->internal_target,
- dst->texture->nr_samples,
+ PIPE_BIND_SAMPLER_VIEW));
+ assert(screen->is_format_supported(screen, dst_format, ctx->internal_target,
+ dst_surface->texture->nr_samples,
dst_is_depth ? PIPE_BIND_DEPTH_STENCIL :
- PIPE_BIND_RENDER_TARGET, 0));
+ PIPE_BIND_RENDER_TARGET));
/* save state (restored below) */
cso_save_blend(ctx->cso);
cso_save_depth_stencil_alpha(ctx->cso);
@@ -480,6 +533,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
cso_save_vertex_shader(ctx->cso);
cso_save_clip(ctx->cso);
cso_save_vertex_elements(ctx->cso);
+ cso_save_vertex_buffers(ctx->cso);
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
@@ -500,12 +554,12 @@ util_blit_pixels_writemask(struct blit_state *ctx,
cso_single_sampler_done(ctx->cso);
/* viewport */
- ctx->viewport.scale[0] = 0.5f * dst->width;
- ctx->viewport.scale[1] = 0.5f * dst->height;
+ ctx->viewport.scale[0] = 0.5f * dst_surface->width;
+ ctx->viewport.scale[1] = 0.5f * dst_surface->height;
ctx->viewport.scale[2] = 0.5f;
ctx->viewport.scale[3] = 1.0f;
- ctx->viewport.translate[0] = 0.5f * dst->width;
- ctx->viewport.translate[1] = 0.5f * dst->height;
+ ctx->viewport.translate[0] = 0.5f * dst_surface->width;
+ ctx->viewport.translate[1] = 0.5f * dst_surface->height;
ctx->viewport.translate[2] = 0.5f;
ctx->viewport.translate[3] = 0.0f;
cso_set_viewport(ctx->cso, &ctx->viewport);
@@ -515,46 +569,35 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* shaders */
if (dst_is_depth) {
- if (ctx->fs_depth == NULL)
- ctx->fs_depth =
- util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D,
- TGSI_INTERPOLATE_LINEAR);
-
- cso_set_fragment_shader_handle(ctx->cso, ctx->fs_depth);
+ set_depth_fragment_shader(ctx);
} else {
- if (ctx->fs[writemask] == NULL)
- ctx->fs[writemask] =
- util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
- TGSI_INTERPOLATE_LINEAR,
- writemask);
-
- cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
+ set_fragment_shader(ctx, writemask);
}
- cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
+ set_vertex_shader(ctx);
/* drawing dest */
memset(&fb, 0, sizeof(fb));
- fb.width = dst->width;
- fb.height = dst->height;
+ fb.width = dst_surface->width;
+ fb.height = dst_surface->height;
if (dst_is_depth) {
- fb.zsbuf = dst;
+ fb.zsbuf = dst_surface;
} else {
fb.nr_cbufs = 1;
- fb.cbufs[0] = dst;
+ fb.cbufs[0] = dst_surface;
}
cso_set_framebuffer(ctx->cso, &fb);
/* draw quad */
offset = setup_vertex_data_tex(ctx,
- (float) dstX0 / dst->width * 2.0f - 1.0f,
- (float) dstY0 / dst->height * 2.0f - 1.0f,
- (float) dstX1 / dst->width * 2.0f - 1.0f,
- (float) dstY1 / dst->height * 2.0f - 1.0f,
+ (float) dstX0 / dst_surface->width * 2.0f - 1.0f,
+ (float) dstY0 / dst_surface->height * 2.0f - 1.0f,
+ (float) dstX1 / dst_surface->width * 2.0f - 1.0f,
+ (float) dstY1 / dst_surface->height * 2.0f - 1.0f,
s0, t0,
s1, t1,
z);
- util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, offset,
+ util_draw_vertex_buffer(ctx->pipe, ctx->cso, ctx->vbuf, offset,
PIPE_PRIM_TRIANGLE_FAN,
4, /* verts */
2); /* attribs/vert */
@@ -571,8 +614,11 @@ util_blit_pixels_writemask(struct blit_state *ctx,
cso_restore_vertex_shader(ctx->cso);
cso_restore_clip(ctx->cso);
cso_restore_vertex_elements(ctx->cso);
+ cso_restore_vertex_buffers(ctx->cso);
pipe_sampler_view_reference(&sampler_view, NULL);
+ if (dst_surface != dst)
+ pipe_surface_reference(&dst_surface, NULL);
}
@@ -657,8 +703,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
PIPE_TEXTURE_2D,
dst->texture->nr_samples,
- PIPE_BIND_RENDER_TARGET,
- 0));
+ PIPE_BIND_RENDER_TARGET));
/* save state (restored below) */
cso_save_blend(ctx->cso);
@@ -672,6 +717,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_save_vertex_shader(ctx->cso);
cso_save_clip(ctx->cso);
cso_save_vertex_elements(ctx->cso);
+ cso_save_vertex_buffers(ctx->cso);
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
@@ -702,8 +748,8 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_set_fragment_sampler_views(ctx->cso, 1, &src_sampler_view);
/* shaders */
- cso_set_fragment_shader_handle(ctx->cso, ctx->fs[TGSI_WRITEMASK_XYZW]);
- cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
+ set_fragment_shader(ctx, TGSI_WRITEMASK_XYZW);
+ set_vertex_shader(ctx);
/* drawing dest */
memset(&fb, 0, sizeof(fb));
@@ -722,7 +768,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
s0, t0, s1, t1,
z);
- util_draw_vertex_buffer(ctx->pipe,
+ util_draw_vertex_buffer(ctx->pipe, ctx->cso,
ctx->vbuf, offset,
PIPE_PRIM_TRIANGLE_FAN,
4, /* verts */
@@ -740,4 +786,5 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_restore_vertex_shader(ctx->cso);
cso_restore_clip(ctx->cso);
cso_restore_vertex_elements(ctx->cso);
+ cso_restore_vertex_buffers(ctx->cso);
}
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 4c986e3565..a4c399052f 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -86,7 +86,6 @@ struct blitter_context_priv
void *dsa_write_depth_keep_stencil;
void *dsa_keep_depth_stencil;
void *dsa_keep_depth_write_stencil;
- void *dsa_flush_depth_stencil;
void *velem_state;
@@ -156,10 +155,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
ctx->dsa_keep_depth_stencil =
pipe->create_depth_stencil_alpha_state(pipe, &dsa);
- dsa.depth.writemask = 1;
- ctx->dsa_flush_depth_stencil =
- pipe->create_depth_stencil_alpha_state(pipe, &dsa);
-
dsa.depth.enabled = 1;
dsa.depth.writemask = 1;
dsa.depth.func = PIPE_FUNC_ALWAYS;
@@ -225,9 +220,10 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
ctx->vertices[i][0][3] = 1; /*v.w*/
/* create the vertex buffer */
- ctx->vbuf = pipe_buffer_create(ctx->base.pipe->screen,
- PIPE_BIND_VERTEX_BUFFER,
- sizeof(ctx->vertices));
+ ctx->vbuf = pipe_user_buffer_create(ctx->base.pipe->screen,
+ ctx->vertices,
+ sizeof(ctx->vertices),
+ PIPE_BIND_VERTEX_BUFFER);
return &ctx->base;
}
@@ -245,7 +241,6 @@ void util_blitter_destroy(struct blitter_context *blitter)
ctx->dsa_write_depth_keep_stencil);
pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
- pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
pipe->delete_rasterizer_state(pipe, ctx->rs_state);
pipe->delete_vs_state(pipe, ctx->vs);
@@ -272,6 +267,12 @@ void util_blitter_destroy(struct blitter_context *blitter)
static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
{
+ if (ctx->base.running) {
+ _debug_printf("u_blitter: Caught recursion on save. "
+ "This is a driver bug.\n");
+ }
+ ctx->base.running = TRUE;
+
/* make sure these CSOs have been saved */
assert(ctx->base.saved_blend_state != INVALID_PTR &&
ctx->base.saved_dsa_state != INVALID_PTR &&
@@ -302,7 +303,6 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
ctx->base.saved_velem_state = INVALID_PTR;
pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref);
-
pipe->set_viewport_state(pipe, &ctx->base.saved_viewport);
pipe->set_clip_state(pipe, &ctx->base.saved_clip);
@@ -346,6 +346,12 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
}
ctx->base.saved_num_vertex_buffers = ~0;
}
+
+ if (!ctx->base.running) {
+ _debug_printf("u_blitter: Caught recursion on restore. "
+ "This is a driver bug.\n");
+ }
+ ctx->base.running = FALSE;
}
static void blitter_set_rectangle(struct blitter_context_priv *ctx,
@@ -509,22 +515,6 @@ static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx,
ctx->dst_height = height;
}
-static void blitter_draw_quad(struct blitter_context_priv *ctx)
-{
- struct pipe_context *pipe = ctx->base.pipe;
- struct pipe_box box;
-
- /* write vertices and draw them */
- u_box_1d(0, sizeof(ctx->vertices), &box);
- pipe->transfer_inline_write(pipe, ctx->vbuf, 0,
- PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
- &box, ctx->vertices, sizeof(ctx->vertices), 0);
-
- util_draw_vertex_buffer(pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN,
- 4, /* verts */
- 2); /* attribs/vert */
-}
-
static INLINE
void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
int miplevel, boolean normalized)
@@ -649,15 +639,19 @@ static void blitter_draw_rectangle(struct blitter_context *blitter,
}
blitter_set_rectangle(ctx, x1, y1, x2, y2, depth);
- blitter_draw_quad(ctx);
+ ctx->base.pipe->redefine_user_buffer(ctx->base.pipe, ctx->vbuf,
+ 0, ctx->vbuf->width0);
+ util_draw_vertex_buffer(ctx->base.pipe, NULL, ctx->vbuf, 0,
+ PIPE_PRIM_TRIANGLE_FAN, 4, 2);
}
-void util_blitter_clear(struct blitter_context *blitter,
- unsigned width, unsigned height,
- unsigned num_cbufs,
- unsigned clear_buffers,
- const float *rgba,
- double depth, unsigned stencil)
+static void util_blitter_clear_custom(struct blitter_context *blitter,
+ unsigned width, unsigned height,
+ unsigned num_cbufs,
+ unsigned clear_buffers,
+ const float *rgba,
+ double depth, unsigned stencil,
+ void *custom_blend, void *custom_dsa)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
@@ -668,26 +662,28 @@ void util_blitter_clear(struct blitter_context *blitter,
blitter_check_saved_CSOs(ctx);
/* bind CSOs */
- if (clear_buffers & PIPE_CLEAR_COLOR)
+ if (custom_blend) {
+ pipe->bind_blend_state(pipe, custom_blend);
+ } else if (clear_buffers & PIPE_CLEAR_COLOR) {
pipe->bind_blend_state(pipe, ctx->blend_write_color);
- else
+ } else {
pipe->bind_blend_state(pipe, ctx->blend_keep_color);
+ }
- if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
- sr.ref_value[0] = stencil & 0xff;
+ if (custom_dsa) {
+ pipe->bind_depth_stencil_alpha_state(pipe, custom_dsa);
+ } else if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
- pipe->set_stencil_ref(pipe, &sr);
- }
- else if (clear_buffers & PIPE_CLEAR_DEPTH) {
+ } else if (clear_buffers & PIPE_CLEAR_DEPTH) {
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_keep_stencil);
- }
- else if (clear_buffers & PIPE_CLEAR_STENCIL) {
- sr.ref_value[0] = stencil & 0xff;
+ } else if (clear_buffers & PIPE_CLEAR_STENCIL) {
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
- pipe->set_stencil_ref(pipe, &sr);
- }
- else
+ } else {
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+ }
+
+ sr.ref_value[0] = stencil & 0xff;
+ pipe->set_stencil_ref(pipe, &sr);
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
@@ -700,6 +696,27 @@ void util_blitter_clear(struct blitter_context *blitter,
blitter_restore_CSOs(ctx);
}
+void util_blitter_clear(struct blitter_context *blitter,
+ unsigned width, unsigned height,
+ unsigned num_cbufs,
+ unsigned clear_buffers,
+ const float *rgba,
+ double depth, unsigned stencil)
+{
+ util_blitter_clear_custom(blitter, width, height, num_cbufs,
+ clear_buffers, rgba, depth, stencil,
+ NULL, NULL);
+}
+
+void util_blitter_clear_depth_custom(struct blitter_context *blitter,
+ unsigned width, unsigned height,
+ double depth, void *custom_dsa)
+{
+ const float rgba[4] = {0, 0, 0, 0};
+ util_blitter_clear_custom(blitter, width, height, 0,
+ 0, rgba, depth, 0, NULL, custom_dsa);
+}
+
static
boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2,
unsigned dx1, unsigned dx2, unsigned dy1, unsigned dy2)
@@ -737,9 +754,6 @@ void util_blitter_copy_region(struct blitter_context *blitter,
if (dst == src) {
assert(!is_overlap(srcbox->x, srcbox->x + width, srcbox->y, srcbox->y + height,
dstx, dstx + width, dsty, dsty + height));
- } else {
- assert(util_is_format_compatible(util_format_description(dst->format),
- util_format_description(src->format)));
}
assert(src->target < PIPE_MAX_TEXTURE_TYPES);
/* XXX should handle 3d regions */
@@ -758,17 +772,20 @@ void util_blitter_copy_region(struct blitter_context *blitter,
/* (assuming copying a stencil buffer is not possible) */
if ((!ignore_stencil && is_stencil) ||
!screen->is_format_supported(screen, dst->format, dst->target,
- dst->nr_samples, bind, 0) ||
+ dst->nr_samples, bind) ||
!screen->is_format_supported(screen, src->format, src->target,
- src->nr_samples, PIPE_BIND_SAMPLER_VIEW, 0)) {
+ src->nr_samples, PIPE_BIND_SAMPLER_VIEW)) {
+ ctx->base.running = TRUE;
util_resource_copy_region(pipe, dst, dstlevel, dstx, dsty, dstz,
src, srclevel, srcbox);
+ ctx->base.running = FALSE;
return;
}
/* Get surface. */
memset(&surf_templ, 0, sizeof(surf_templ));
u_surface_default_template(&surf_templ, dst, bind);
+ surf_templ.format = util_format_linear(dst->format);
surf_templ.u.tex.level = dstlevel;
surf_templ.u.tex.first_layer = dstz;
surf_templ.u.tex.last_layer = dstz;
@@ -807,7 +824,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
normalized = src->target != PIPE_TEXTURE_RECT;
/* Initialize sampler view. */
- u_sampler_view_default_template(&viewTempl, src, src->format);
+ u_sampler_view_default_template(&viewTempl, src, util_format_linear(src->format));
view = pipe->create_sampler_view(pipe, src, &viewTempl);
/* Set rasterizer state, shaders, and textures. */
@@ -853,7 +870,10 @@ void util_blitter_copy_region(struct blitter_context *blitter,
/* Draw. */
blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0);
- blitter_draw_quad(ctx);
+ ctx->base.pipe->redefine_user_buffer(ctx->base.pipe, ctx->vbuf,
+ 0, ctx->vbuf->width0);
+ util_draw_vertex_buffer(ctx->base.pipe, NULL, ctx->vbuf, 0,
+ PIPE_PRIM_TRIANGLE_FAN, 4, 2);
break;
default:
@@ -1014,12 +1034,3 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
UTIL_BLITTER_ATTRIB_NONE, NULL);
blitter_restore_CSOs(ctx);
}
-
-/* flush a region of a depth stencil surface for r300g */
-void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
- struct pipe_surface *dstsurf)
-{
- struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- util_blitter_custom_depth_stencil(blitter, dstsurf, NULL,
- ctx->dsa_flush_depth_stencil, 0.0f);
-}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index c5660cf2d0..41470d92bb 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -55,13 +55,13 @@ struct blitter_context
* \param y1 A Y coordinate of the top-left corner.
* \param x2 An X coordinate of the bottom-right corner.
* \param y2 A Y coordinate of the bottom-right corner.
- * \param depth A depth which the rectangle is rendered at.
+ * \param depth A depth which the rectangle is rendered at.
*
* \param type Semantics of the attributes "attrib".
* If type is UTIL_BLITTER_ATTRIB_NONE, ignore them.
* If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes
- * make up a constant RGBA color, and should go to the COLOR0
- * varying slot of a fragment shader.
+ * make up a constant RGBA color, and should go
+ * to the GENERIC0 varying slot of a fragment shader.
* If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and
* {a3, a4} specify top-left and bottom-right texture
* coordinates of the rectangle, respectively, and should go
@@ -79,6 +79,9 @@ struct blitter_context
enum blitter_attrib_type type,
const float attrib[4]);
+ /* Whether the blitter is running. */
+ boolean running;
+
/* Private members, really. */
struct pipe_context *pipe; /**< pipe context */
@@ -141,6 +144,10 @@ void util_blitter_clear(struct blitter_context *blitter,
const float *rgba,
double depth, unsigned stencil);
+void util_blitter_clear_depth_custom(struct blitter_context *blitter,
+ unsigned width, unsigned height,
+ double depth, void *custom_dsa);
+
/**
* Copy a block of pixels from one surface to another.
*
@@ -200,9 +207,6 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height);
-void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
- struct pipe_surface *dstsurf);
-
void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
struct pipe_surface *zsurf,
struct pipe_surface *cbsurf,
@@ -318,21 +322,13 @@ util_blitter_save_vertex_buffers(struct blitter_context *blitter,
int num_vertex_buffers,
struct pipe_vertex_buffer *vertex_buffers)
{
- unsigned i;
assert(num_vertex_buffers <= Elements(blitter->saved_vertex_buffers));
- blitter->saved_num_vertex_buffers = num_vertex_buffers;
-
- for (i = 0; i < num_vertex_buffers; i++) {
- if (vertex_buffers[i].buffer) {
- pipe_resource_reference(&blitter->saved_vertex_buffers[i].buffer,
- vertex_buffers[i].buffer);
- }
- }
-
- memcpy(blitter->saved_vertex_buffers,
- vertex_buffers,
- num_vertex_buffers * sizeof(struct pipe_vertex_buffer));
+ blitter->saved_num_vertex_buffers = 0;
+ util_copy_vertex_buffers(blitter->saved_vertex_buffers,
+ (unsigned*)&blitter->saved_num_vertex_buffers,
+ vertex_buffers,
+ num_vertex_buffers);
}
#ifdef __cplusplus
diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c
index 15f4a8831f..df08ec302a 100644
--- a/src/gallium/auxiliary/util/u_cache.c
+++ b/src/gallium/auxiliary/util/u_cache.c
@@ -27,9 +27,10 @@
/**
* @file
- * Simple cache implementation.
+ * Improved cache implementation.
*
- * We simply have fixed size array, and destroy previous values on collision.
+ * Fixed size array with linear probing on collision and LRU eviction
+ * on full.
*
* @author Jose Fonseca <jfonseca@vmware.com>
*/
@@ -41,10 +42,17 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_cache.h"
+#include "util/u_simple_list.h"
struct util_cache_entry
{
+ enum { EMPTY = 0, FILLED, DELETED } state;
+ uint32_t hash;
+
+ struct util_cache_entry *next;
+ struct util_cache_entry *prev;
+
void *key;
void *value;
@@ -69,11 +77,14 @@ struct util_cache
struct util_cache_entry *entries;
-#ifdef DEBUG
unsigned count;
-#endif
+ struct util_cache_entry lru;
};
+static void
+ensure_sanity(const struct util_cache *cache);
+
+#define CACHE_DEFAULT_ALPHA 2
struct util_cache *
util_cache_create(uint32_t (*hash)(const void *key),
@@ -90,6 +101,10 @@ util_cache_create(uint32_t (*hash)(const void *key),
cache->hash = hash;
cache->compare = compare;
cache->destroy = destroy;
+
+ make_empty_list(&cache->lru);
+
+ size *= CACHE_DEFAULT_ALPHA;
cache->size = size;
cache->entries = CALLOC(size, sizeof(struct util_cache_entry));
@@ -98,19 +113,46 @@ util_cache_create(uint32_t (*hash)(const void *key),
return NULL;
}
+ ensure_sanity(cache);
return cache;
}
-static INLINE struct util_cache_entry *
+static struct util_cache_entry *
util_cache_entry_get(struct util_cache *cache,
+ uint32_t hash,
const void *key)
{
- uint32_t hash;
-
- hash = cache->hash(key);
+ struct util_cache_entry *first_unfilled = NULL;
+ uint32_t index = hash % cache->size;
+ uint32_t probe;
+
+ /* Probe until we find either a matching FILLED entry or an EMPTY
+ * slot (which has never been occupied).
+ *
+ * Deleted or non-matching slots are not indicative of completion
+ * as a previous linear probe for the same key could have continued
+ * past this point.
+ */
+ for (probe = 0; probe < cache->size; probe++) {
+ uint32_t i = (index + probe) % cache->size;
+ struct util_cache_entry *current = &cache->entries[i];
+
+ if (current->state == FILLED) {
+ if (current->hash == hash &&
+ cache->compare(key, current->key) == 0)
+ return current;
+ }
+ else {
+ if (!first_unfilled)
+ first_unfilled = current;
+
+ if (current->state == EMPTY)
+ return first_unfilled;
+ }
+ }
- return &cache->entries[hash % cache->size];
+ return NULL;
}
static INLINE void
@@ -123,9 +165,15 @@ util_cache_entry_destroy(struct util_cache *cache,
entry->key = NULL;
entry->value = NULL;
- if(key || value)
+ if (entry->state == FILLED) {
+ remove_from_list(entry);
+ cache->count--;
+
if(cache->destroy)
cache->destroy(key, value);
+
+ entry->state = DELETED;
+ }
}
@@ -135,21 +183,33 @@ util_cache_set(struct util_cache *cache,
void *value)
{
struct util_cache_entry *entry;
+ uint32_t hash = cache->hash(key);
assert(cache);
if (!cache)
return;
- entry = util_cache_entry_get(cache, key);
+ entry = util_cache_entry_get(cache, hash, key);
+ if (!entry)
+ entry = cache->lru.prev;
+
+ if (cache->count >= cache->size / CACHE_DEFAULT_ALPHA)
+ util_cache_entry_destroy(cache, cache->lru.prev);
+
util_cache_entry_destroy(cache, entry);
#ifdef DEBUG
++entry->count;
- ++cache->count;
#endif
entry->key = key;
+ entry->hash = hash;
entry->value = value;
+ entry->state = FILLED;
+ insert_at_head(&cache->lru, entry);
+ cache->count++;
+
+ ensure_sanity(cache);
}
@@ -158,17 +218,18 @@ util_cache_get(struct util_cache *cache,
const void *key)
{
struct util_cache_entry *entry;
+ uint32_t hash = cache->hash(key);
assert(cache);
if (!cache)
return NULL;
- entry = util_cache_entry_get(cache, key);
- if(!entry->key && !entry->value)
- return NULL;
-
- if(cache->compare(key, entry->key) != 0)
+ entry = util_cache_entry_get(cache, hash, key);
+ if (!entry)
return NULL;
+
+ if (entry->state == FILLED)
+ move_to_head(&cache->lru, entry);
return entry->value;
}
@@ -183,8 +244,14 @@ util_cache_clear(struct util_cache *cache)
if (!cache)
return;
- for(i = 0; i < cache->size; ++i)
+ for(i = 0; i < cache->size; ++i) {
util_cache_entry_destroy(cache, &cache->entries[i]);
+ cache->entries[i].state = EMPTY;
+ }
+
+ assert(cache->count == 0);
+ assert(is_empty_list(&cache->lru));
+ ensure_sanity(cache);
}
@@ -215,3 +282,70 @@ util_cache_destroy(struct util_cache *cache)
FREE(cache->entries);
FREE(cache);
}
+
+
+void
+util_cache_remove(struct util_cache *cache,
+ const void *key)
+{
+ struct util_cache_entry *entry;
+ uint32_t hash;
+
+ assert(cache);
+ if (!cache)
+ return;
+
+ hash = cache->hash(key);
+
+ entry = util_cache_entry_get(cache, hash, key);
+ if (!entry)
+ return;
+
+ if (entry->state == FILLED)
+ util_cache_entry_destroy(cache, entry);
+
+ ensure_sanity(cache);
+}
+
+
+static void
+ensure_sanity(const struct util_cache *cache)
+{
+#ifdef DEBUG
+ unsigned i, cnt = 0;
+
+ assert(cache);
+ for (i = 0; i < cache->size; i++) {
+ struct util_cache_entry *header = &cache->entries[i];
+
+ assert(header);
+ assert(header->state == FILLED ||
+ header->state == EMPTY ||
+ header->state == DELETED);
+ if (header->state == FILLED) {
+ cnt++;
+ assert(header->hash == cache->hash(header->key));
+ }
+ }
+
+ assert(cnt == cache->count);
+ assert(cache->size >= cnt);
+
+ if (cache->count == 0) {
+ assert (is_empty_list(&cache->lru));
+ }
+ else {
+ struct util_cache_entry *header = cache->lru.next;
+
+ assert (header);
+ assert (!is_empty_list(&cache->lru));
+
+ for (i = 0; i < cache->count; i++)
+ header = header->next;
+
+ assert(header == &cache->lru);
+ }
+#endif
+
+ (void)cache;
+}
diff --git a/src/gallium/auxiliary/util/u_cache.h b/src/gallium/auxiliary/util/u_cache.h
index 8a612c6585..be3631b725 100644
--- a/src/gallium/auxiliary/util/u_cache.h
+++ b/src/gallium/auxiliary/util/u_cache.h
@@ -79,6 +79,10 @@ util_cache_clear(struct util_cache *cache);
void
util_cache_destroy(struct util_cache *cache);
+void
+util_cache_remove(struct util_cache *cache,
+ const void *key);
+
#ifdef __cplusplus
}
diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c
index e209a98b70..75677b2b13 100644
--- a/src/gallium/auxiliary/util/u_caps.c
+++ b/src/gallium/auxiliary/util/u_caps.c
@@ -69,8 +69,7 @@ util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out)
list[i++],
PIPE_TEXTURE_2D,
0,
- PIPE_BIND_SAMPLER_VIEW,
- 0)) {
+ PIPE_BIND_SAMPLER_VIEW)) {
*out = i - 2;
return FALSE;
}
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 2ad2f95b13..36ce4b5771 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -44,9 +44,11 @@
#include "util/u_surface.h"
#include <limits.h> /* CHAR_BIT */
+#include <ctype.h> /* isalnum */
void _debug_vprintf(const char *format, va_list ap)
{
+#if defined(PIPE_OS_WINDOWS) || defined(PIPE_OS_EMBEDDED)
/* We buffer until we find a newline. */
static char buf[4096] = {'\0'};
size_t len = strlen(buf);
@@ -55,6 +57,10 @@ void _debug_vprintf(const char *format, va_list ap)
os_log_message(buf);
buf[0] = '\0';
}
+#else
+ /* Just print as-is to stderr */
+ vfprintf(stderr, format, ap);
+#endif
}
@@ -175,6 +181,48 @@ debug_get_num_option(const char *name, long dfault)
return result;
}
+static boolean str_has_option(const char *str, const char *name)
+{
+ /* Empty string. */
+ if (!*str) {
+ return FALSE;
+ }
+
+ /* OPTION=all */
+ if (!util_strcmp(str, "all")) {
+ return TRUE;
+ }
+
+ /* Find 'name' in 'str' surrounded by non-alphanumeric characters. */
+ {
+ const char *start = str;
+ unsigned name_len = strlen(name);
+
+ /* 'start' is the beginning of the currently-parsed word,
+ * we increment 'str' each iteration.
+ * if we find either the end of string or a non-alphanumeric character,
+ * we compare 'start' up to 'str-1' with 'name'. */
+
+ while (1) {
+ if (!*str || !isalnum(*str)) {
+ if (str-start == name_len &&
+ !memcmp(start, name, name_len)) {
+ return TRUE;
+ }
+
+ if (!*str) {
+ return FALSE;
+ }
+
+ start = str+1;
+ }
+
+ str++;
+ }
+ }
+
+ return FALSE;
+}
unsigned long
debug_get_flags_option(const char *name,
@@ -202,7 +250,7 @@ debug_get_flags_option(const char *name,
else {
result = 0;
while( flags->name ) {
- if (!util_strcmp(str, "all") || util_strstr(str, flags->name ))
+ if (str_has_option(str, flags->name))
result |= flags->value;
++flags;
}
@@ -354,6 +402,41 @@ const char *u_prim_name( unsigned prim )
+#ifdef DEBUG
+int fl_indent = 0;
+const char* fl_function[1024];
+
+int debug_funclog_enter(const char* f, const int line, const char* file)
+{
+ int i;
+
+ for (i = 0; i < fl_indent; i++)
+ debug_printf(" ");
+ debug_printf("%s\n", f);
+
+ assert(fl_indent < 1023);
+ fl_function[fl_indent++] = f;
+
+ return 0;
+}
+
+void debug_funclog_exit(const char* f, const int line, const char* file)
+{
+ --fl_indent;
+ assert(fl_indent >= 0);
+ assert(fl_function[fl_indent] == f);
+}
+
+void debug_funclog_enter_exit(const char* f, const int line, const char* file)
+{
+ int i;
+ for (i = 0; i < fl_indent; i++)
+ debug_printf(" ");
+ debug_printf("%s\n", f);
+}
+#endif
+
+
#ifdef DEBUG
/**
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index 1c9624ea3e..c47c13c64c 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -280,6 +280,43 @@ debug_dump_flags(const struct debug_named_value *names,
/**
+ * Function enter exit loggers
+ */
+#ifdef DEBUG
+int debug_funclog_enter(const char* f, const int line, const char* file);
+void debug_funclog_exit(const char* f, const int line, const char* file);
+void debug_funclog_enter_exit(const char* f, const int line, const char* file);
+
+#define DEBUG_FUNCLOG_ENTER() \
+ int __debug_decleration_work_around = \
+ debug_funclog_enter(__FUNCTION__, __LINE__, __FILE__)
+#define DEBUG_FUNCLOG_EXIT() \
+ do { \
+ (void)__debug_decleration_work_around; \
+ debug_funclog_exit(__FUNCTION__, __LINE__, __FILE__); \
+ return; \
+ } while(0)
+#define DEBUG_FUNCLOG_EXIT_RET(ret) \
+ do { \
+ (void)__debug_decleration_work_around; \
+ debug_funclog_exit(__FUNCTION__, __LINE__, __FILE__); \
+ return ret; \
+ } while(0)
+#define DEBUG_FUNCLOG_ENTER_EXIT() \
+ debug_funclog_enter_exit(__FUNCTION__, __LINE__, __FILE__)
+
+#else
+#define DEBUG_FUNCLOG_ENTER() \
+ int __debug_decleration_work_around
+#define DEBUG_FUNCLOG_EXIT() \
+ do { (void)__debug_decleration_work_around; return; } while(0)
+#define DEBUG_FUNCLOG_EXIT_RET(ret) \
+ do { (void)__debug_decleration_work_around; return ret; } while(0)
+#define DEBUG_FUNCLOG_ENTER_EXIT()
+#endif
+
+
+/**
* Get option.
*
* It is an alias for getenv on Linux.
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.c b/src/gallium/auxiliary/util/u_debug_refcnt.c
index 40a26c9c69..6f706a35fd 100644
--- a/src/gallium/auxiliary/util/u_debug_refcnt.c
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.c
@@ -43,7 +43,8 @@ int debug_refcnt_state;
struct os_stream* stream;
/* TODO: maybe move this serial machinery to a stand-alone module and expose it? */
-static pipe_mutex serials_mutex;
+pipe_static_mutex(serials_mutex);
+
static struct util_hash_table* serials_hash;
static unsigned serials_last;
@@ -66,6 +67,15 @@ static boolean debug_serial(void* p, unsigned* pserial)
{
unsigned serial;
boolean found = TRUE;
+#ifdef PIPE_SUBSYSTEM_WINDOWS_USER
+ static boolean first = TRUE;
+
+ if (first) {
+ pipe_mutex_init(serials_mutex);
+ first = FALSE;
+ }
+#endif
+
pipe_mutex_lock(serials_mutex);
if(!serials_hash)
serials_hash = util_hash_table_create(hash_ptr, compare_ptr);
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c
index 44d437747a..bae9be87a2 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.c
+++ b/src/gallium/auxiliary/util/u_debug_symbol.c
@@ -40,20 +40,43 @@
#include "u_debug_symbol.h"
#include "u_hash_table.h"
-#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
+#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86)
#include <windows.h>
#include <stddef.h>
-#include <imagehlp.h>
-/*
- * TODO: Cleanup code.
- * TODO: Support x86_64
- */
+#include "dbghelp.h"
+
static BOOL bSymInitialized = FALSE;
-static HMODULE hModule_Imagehlp = NULL;
+static HMODULE hModule_Dbghelp = NULL;
+
+
+static
+FARPROC WINAPI __GetProcAddress(LPCSTR lpProcName)
+{
+#ifdef PIPE_CC_GCC
+ if (!hModule_Dbghelp) {
+ /*
+ * bfdhelp.dll is a dbghelp.dll look-alike replacement, which is able to
+ * understand MinGW symbols using BFD library. It is available from
+ * http://people.freedesktop.org/~jrfonseca/bfdhelp/ for now.
+ */
+ hModule_Dbghelp = LoadLibraryA("bfdhelp.dll");
+ }
+#endif
+
+ if (!hModule_Dbghelp) {
+ hModule_Dbghelp = LoadLibraryA("dbghelp.dll");
+ if (!hModule_Dbghelp) {
+ return NULL;
+ }
+ }
+
+ return GetProcAddress(hModule_Dbghelp, lpProcName);
+}
+
typedef BOOL (WINAPI *PFNSYMINITIALIZE)(HANDLE, LPSTR, BOOL);
static PFNSYMINITIALIZE pfnSymInitialize = NULL;
@@ -62,8 +85,7 @@ static
BOOL WINAPI j_SymInitialize(HANDLE hProcess, PSTR UserSearchPath, BOOL fInvadeProcess)
{
if(
- (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) &&
- (pfnSymInitialize || (pfnSymInitialize = (PFNSYMINITIALIZE) GetProcAddress(hModule_Imagehlp, "SymInitialize")))
+ (pfnSymInitialize || (pfnSymInitialize = (PFNSYMINITIALIZE) __GetProcAddress("SymInitialize")))
)
return pfnSymInitialize(hProcess, UserSearchPath, fInvadeProcess);
else
@@ -77,57 +99,41 @@ static
DWORD WINAPI j_SymSetOptions(DWORD SymOptions)
{
if(
- (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) &&
- (pfnSymSetOptions || (pfnSymSetOptions = (PFNSYMSETOPTIONS) GetProcAddress(hModule_Imagehlp, "SymSetOptions")))
+ (pfnSymSetOptions || (pfnSymSetOptions = (PFNSYMSETOPTIONS) __GetProcAddress("SymSetOptions")))
)
return pfnSymSetOptions(SymOptions);
else
return FALSE;
}
-typedef PGET_MODULE_BASE_ROUTINE PFNSYMGETMODULEBASE;
-static PFNSYMGETMODULEBASE pfnSymGetModuleBase = NULL;
+typedef BOOL (WINAPI *PFNSYMGETSYMFROMADDR)(HANDLE, DWORD64, PDWORD64, PSYMBOL_INFO);
+static PFNSYMGETSYMFROMADDR pfnSymFromAddr = NULL;
static
-DWORD WINAPI j_SymGetModuleBase(HANDLE hProcess, DWORD dwAddr)
+BOOL WINAPI j_SymFromAddr(HANDLE hProcess, DWORD64 Address, PDWORD64 Displacement, PSYMBOL_INFO Symbol)
{
if(
- (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) &&
- (pfnSymGetModuleBase || (pfnSymGetModuleBase = (PFNSYMGETMODULEBASE) GetProcAddress(hModule_Imagehlp, "SymGetModuleBase")))
+ (pfnSymFromAddr || (pfnSymFromAddr = (PFNSYMGETSYMFROMADDR) __GetProcAddress("SymFromAddr")))
)
- return pfnSymGetModuleBase(hProcess, dwAddr);
- else
- return 0;
-}
-
-typedef BOOL (WINAPI *PFNSYMGETSYMFROMADDR)(HANDLE, DWORD, LPDWORD, PIMAGEHLP_SYMBOL);
-static PFNSYMGETSYMFROMADDR pfnSymGetSymFromAddr = NULL;
-
-static
-BOOL WINAPI j_SymGetSymFromAddr(HANDLE hProcess, DWORD Address, PDWORD Displacement, PIMAGEHLP_SYMBOL Symbol)
-{
- if(
- (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) &&
- (pfnSymGetSymFromAddr || (pfnSymGetSymFromAddr = (PFNSYMGETSYMFROMADDR) GetProcAddress(hModule_Imagehlp, "SymGetSymFromAddr")))
- )
- return pfnSymGetSymFromAddr(hProcess, Address, Displacement, Symbol);
+ return pfnSymFromAddr(hProcess, Address, Displacement, Symbol);
else
return FALSE;
}
static INLINE void
-debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size)
+debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size)
{
HANDLE hProcess;
BYTE symbolBuffer[1024];
- PIMAGEHLP_SYMBOL pSymbol = (PIMAGEHLP_SYMBOL) symbolBuffer;
- DWORD dwDisplacement = 0; /* Displacement of the input address, relative to the start of the symbol */
+ PSYMBOL_INFO pSymbol = (PSYMBOL_INFO) symbolBuffer;
+ DWORD64 dwDisplacement = 0; /* Displacement of the input address, relative to the start of the symbol */
hProcess = GetCurrentProcess();
+ memset(pSymbol, 0, sizeof *pSymbol);
pSymbol->SizeOfStruct = sizeof(symbolBuffer);
- pSymbol->MaxNameLength = sizeof(symbolBuffer) - offsetof(IMAGEHLP_SYMBOL, Name);
+ pSymbol->MaxNameLen = sizeof(symbolBuffer) - offsetof(SYMBOL_INFO, Name);
if(!bSymInitialized) {
j_SymSetOptions(/* SYMOPT_UNDNAME | */ SYMOPT_LOAD_LINES);
@@ -135,7 +141,7 @@ debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size)
bSymInitialized = TRUE;
}
- if(!j_SymGetSymFromAddr(hProcess, (DWORD)addr, &dwDisplacement, pSymbol))
+ if(!j_SymFromAddr(hProcess, (DWORD64)(uintptr_t)addr, &dwDisplacement, pSymbol))
buf[0] = 0;
else
{
@@ -165,8 +171,8 @@ debug_symbol_name_glibc(const void *addr, char* buf, unsigned size)
void
debug_symbol_name(const void *addr, char* buf, unsigned size)
{
-#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
- debug_symbol_name_imagehlp(addr, buf, size);
+#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86)
+ debug_symbol_name_dbghelp(addr, buf, size);
if(buf[0])
return;
#endif
@@ -190,7 +196,7 @@ debug_symbol_print(const void *addr)
}
struct util_hash_table* symbols_hash;
-pipe_mutex symbols_mutex;
+pipe_static_mutex(symbols_mutex);
static unsigned hash_ptr(void* p)
{
@@ -211,6 +217,15 @@ const char*
debug_symbol_name_cached(const void *addr)
{
const char* name;
+#ifdef PIPE_SUBSYSTEM_WINDOWS_USER
+ static boolean first = TRUE;
+
+ if (first) {
+ pipe_mutex_init(symbols_mutex);
+ first = FALSE;
+ }
+#endif
+
pipe_mutex_lock(symbols_mutex);
if(!symbols_hash)
symbols_hash = util_hash_table_create(hash_ptr, compare_ptr);
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index 0b6dc5880f..8ed3b3c095 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -31,6 +31,7 @@
#include "util/u_inlines.h"
#include "util/u_draw_quad.h"
#include "util/u_memory.h"
+#include "cso_cache/cso_context.h"
/**
@@ -39,6 +40,7 @@
*/
void
util_draw_vertex_buffer(struct pipe_context *pipe,
+ struct cso_context *cso,
struct pipe_resource *vbuf,
uint offset,
uint prim_type,
@@ -54,8 +56,12 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
vbuffer.buffer = vbuf;
vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */
vbuffer.buffer_offset = offset;
- vbuffer.max_index = num_verts - 1;
- pipe->set_vertex_buffers(pipe, 1, &vbuffer);
+
+ if (cso) {
+ cso_set_vertex_buffers(cso, 1, &vbuffer);
+ } else {
+ pipe->set_vertex_buffers(pipe, 1, &vbuffer);
+ }
/* note: vertex elements already set by caller */
@@ -70,13 +76,13 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
* Note: this isn't especially efficient.
*/
void
-util_draw_texquad(struct pipe_context *pipe,
+util_draw_texquad(struct pipe_context *pipe, struct cso_context *cso,
float x0, float y0, float x1, float y1, float z)
{
uint numAttribs = 2, i, j;
uint vertexBytes = 4 * (4 * numAttribs * sizeof(float));
struct pipe_resource *vbuf = NULL;
- uint *v = NULL;
+ float *v = NULL;
v = MALLOC(vertexBytes);
if (v == NULL)
@@ -115,10 +121,10 @@ util_draw_texquad(struct pipe_context *pipe,
vbuf = pipe_user_buffer_create(pipe->screen, v, vertexBytes,
PIPE_BIND_VERTEX_BUFFER);
- if (!vbuf)
+ if (!vbuf)
goto out;
- util_draw_vertex_buffer(pipe, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2);
+ util_draw_vertex_buffer(pipe, cso, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2);
out:
if (vbuf)
diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h
index 52994fe05c..f1167786f0 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.h
+++ b/src/gallium/auxiliary/util/u_draw_quad.h
@@ -38,17 +38,18 @@ extern "C" {
#endif
struct pipe_resource;
+struct cso_context;
#include "util/u_draw.h"
extern void
-util_draw_vertex_buffer(struct pipe_context *pipe,
+util_draw_vertex_buffer(struct pipe_context *pipe, struct cso_context *cso,
struct pipe_resource *vbuf, uint offset,
uint num_attribs, uint num_verts, uint prim_type);
extern void
-util_draw_texquad(struct pipe_context *pipe,
+util_draw_texquad(struct pipe_context *pipe, struct cso_context *cso,
float x0, float y0, float x1, float y1, float z);
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index b471d59eeb..5ecf8cbb06 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -681,7 +681,6 @@ util_dump_vertex_buffer(struct os_stream *stream, const struct pipe_vertex_buffe
util_dump_struct_begin(stream, "pipe_vertex_buffer");
util_dump_member(stream, uint, state, stride);
- util_dump_member(stream, uint, state, max_index);
util_dump_member(stream, uint, state, buffer_offset);
util_dump_member(stream, ptr, state, buffer);
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 1fbd83841c..6f5cc618ff 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -76,6 +76,7 @@ PIPE_FORMAT_B4G4R4X4_UNORM , plain, 1, 1, un4 , un4 , un4 , x4 , zyx1, r
PIPE_FORMAT_B5G6R5_UNORM , plain, 1, 1, un5 , un6 , un5 , , zyx1, rgb
PIPE_FORMAT_R10G10B10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , xyzw, rgb
PIPE_FORMAT_B10G10R10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , zyxw, rgb
+PIPE_FORMAT_B2G3R3_UNORM , plain, 1, 1, un2 , un3 , un3 , , zyx1, rgb
# Luminance/Intensity/Alpha formats
PIPE_FORMAT_L8_UNORM , plain, 1, 1, un8 , , , , xxx1, rgb
@@ -84,6 +85,9 @@ PIPE_FORMAT_I8_UNORM , plain, 1, 1, un8 , , , , xxxx, r
PIPE_FORMAT_L4A4_UNORM , plain, 1, 1, un4 , un4 , , , xxxy, rgb
PIPE_FORMAT_L8A8_UNORM , plain, 1, 1, un8 , un8 , , , xxxy, rgb
PIPE_FORMAT_L16_UNORM , plain, 1, 1, un16, , , , xxx1, rgb
+PIPE_FORMAT_A16_UNORM , plain, 1, 1, un16, , , , 000x, rgb
+PIPE_FORMAT_I16_UNORM , plain, 1, 1, un16, , , , xxxx, rgb
+PIPE_FORMAT_L16A16_UNORM , plain, 1, 1, un16, un16, , , xxxy, rgb
# SRGB formats
PIPE_FORMAT_L8_SRGB , plain, 1, 1, un8 , , , , xxx1, srgb
@@ -137,6 +141,7 @@ PIPE_FORMAT_R8G8Bx_SNORM , other, 1, 1, sn8 , sn8 , , , x
# - http://en.wikipedia.org/wiki/S3_Texture_Compression
# - http://www.opengl.org/registry/specs/EXT/texture_compression_s3tc.txt
# - http://www.opengl.org/registry/specs/ARB/texture_compression_rgtc.txt
+# - http://www.opengl.org/registry/specs/EXT/texture_compression_latc.txt
# - http://msdn.microsoft.com/en-us/library/bb694531.aspx
PIPE_FORMAT_DXT1_RGB , s3tc, 4, 4, x64 , , , , xyz1, rgb
PIPE_FORMAT_DXT1_RGBA , s3tc, 4, 4, x64 , , , , xyzw, rgb
@@ -152,6 +157,11 @@ PIPE_FORMAT_RGTC1_SNORM , rgtc, 4, 4, x64, , , , x001, rg
PIPE_FORMAT_RGTC2_UNORM , rgtc, 4, 4, x128, , , , xy01, rgb
PIPE_FORMAT_RGTC2_SNORM , rgtc, 4, 4, x128, , , , xy01, rgb
+PIPE_FORMAT_LATC1_UNORM , rgtc, 4, 4, x64, , , , xxx1, rgb
+PIPE_FORMAT_LATC1_SNORM , rgtc, 4, 4, x64, , , , xxx1, rgb
+PIPE_FORMAT_LATC2_UNORM , rgtc, 4, 4, x128, , , , xxxy, rgb
+PIPE_FORMAT_LATC2_SNORM , rgtc, 4, 4, x128, , , , xxxy, rgb
+
# Straightforward D3D10-like formats (also used for
# vertex buffer element description)
#
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 03b73c0e98..7659a802a4 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -673,7 +673,8 @@ util_format_has_alpha(enum pipe_format format)
}
/**
- * Return the matching SRGB format, or PIPE_FORMAT_NONE if none.
+ * Given a linear RGB colorspace format, return the corresponding SRGB
+ * format, or PIPE_FORMAT_NONE if none.
*/
static INLINE enum pipe_format
util_format_srgb(enum pipe_format format)
@@ -711,6 +712,45 @@ util_format_srgb(enum pipe_format format)
}
/**
+ * Given an sRGB format, return the corresponding linear colorspace format.
+ * For non sRGB formats, return the format unchanged.
+ */
+static INLINE enum pipe_format
+util_format_linear(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_L8_SRGB:
+ return PIPE_FORMAT_L8_UNORM;
+ case PIPE_FORMAT_L8A8_SRGB:
+ return PIPE_FORMAT_L8A8_UNORM;
+ case PIPE_FORMAT_R8G8B8_SRGB:
+ return PIPE_FORMAT_R8G8B8_UNORM;
+ case PIPE_FORMAT_A8B8G8R8_SRGB:
+ return PIPE_FORMAT_A8B8G8R8_UNORM;
+ case PIPE_FORMAT_X8B8G8R8_SRGB:
+ return PIPE_FORMAT_X8B8G8R8_UNORM;
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ return PIPE_FORMAT_B8G8R8A8_UNORM;
+ case PIPE_FORMAT_B8G8R8X8_SRGB:
+ return PIPE_FORMAT_B8G8R8X8_UNORM;
+ case PIPE_FORMAT_A8R8G8B8_SRGB:
+ return PIPE_FORMAT_A8R8G8B8_UNORM;
+ case PIPE_FORMAT_X8R8G8B8_SRGB:
+ return PIPE_FORMAT_X8R8G8B8_UNORM;
+ case PIPE_FORMAT_DXT1_SRGB:
+ return PIPE_FORMAT_DXT1_RGB;
+ case PIPE_FORMAT_DXT1_SRGBA:
+ return PIPE_FORMAT_DXT1_RGBA;
+ case PIPE_FORMAT_DXT3_SRGBA:
+ return PIPE_FORMAT_DXT3_RGBA;
+ case PIPE_FORMAT_DXT5_SRGBA:
+ return PIPE_FORMAT_DXT5_RGBA;
+ default:
+ return format;
+ }
+}
+
+/**
* Return the number of components stored.
* Formats with block size != 1x1 will always have 1 component (the block).
*/
diff --git a/src/gallium/auxiliary/util/u_format_latc.c b/src/gallium/auxiliary/util/u_format_latc.c
new file mode 100644
index 0000000000..e84c493bb1
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_latc.c
@@ -0,0 +1,328 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+#include "u_math.h"
+#include "u_format.h"
+#include "u_format_rgtc.h"
+#include "u_format_latc.h"
+
+static void u_format_unsigned_encode_rgtc_chan(uint8_t *blkaddr, uint8_t srccolors[4][4],
+ int numxpixels, int numypixels);
+
+static void u_format_unsigned_fetch_texel_rgtc(unsigned srcRowStride, const uint8_t *pixdata,
+ unsigned i, unsigned j, uint8_t *value, unsigned comps);
+
+static void u_format_signed_encode_rgtc_chan(int8_t *blkaddr, int8_t srccolors[4][4],
+ int numxpixels, int numypixels);
+
+static void u_format_signed_fetch_texel_rgtc(unsigned srcRowStride, const int8_t *pixdata,
+ unsigned i, unsigned j, int8_t *value, unsigned comps);
+
+void
+util_format_latc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ /* Fix warnings here: */
+ (void) u_format_unsigned_encode_rgtc_chan;
+ (void) u_format_signed_encode_rgtc_chan;
+
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 1);
+}
+
+void
+util_format_latc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_rgtc1_unorm_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_latc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row,
+ unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_rgtc1_unorm_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_latc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 8;
+
+ for(y = 0; y < height; y += 4) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ uint8_t tmp_r;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1);
+ dst[0] =
+ dst[1] =
+ dst[2] = ubyte_to_float(tmp_r);
+ dst[3] = 1.0;
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_latc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_rgtc1_unorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_latc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ uint8_t tmp_r;
+
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1);
+ dst[0] =
+ dst[1] =
+ dst[2] = ubyte_to_float(tmp_r);
+ dst[3] = 1.0;
+}
+
+void
+util_format_latc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_latc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_latc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_latc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_rgtc1_snorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_latc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 8;
+
+ for(y = 0; y < height; y += 4) {
+ const int8_t *src = (int8_t *)src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ int8_t tmp_r;
+ u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1);
+ dst[0] =
+ dst[1] =
+ dst[2] = byte_to_float_tex(tmp_r);
+ dst[3] = 1.0;
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_latc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ int8_t tmp_r;
+
+ u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 1);
+ dst[0] =
+ dst[1] =
+ dst[2] = byte_to_float_tex(tmp_r);
+ dst[3] = 1.0;
+}
+
+
+void
+util_format_latc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ puts(__func__);
+
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, dst + 1, 2);
+}
+
+void
+util_format_latc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_rgtc2_unorm_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_latc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_rgtc2_unorm_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_latc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_rxtc2_unorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 3);
+}
+
+void
+util_format_latc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 16;
+
+ for(y = 0; y < height; y += 4) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ uint8_t tmp_r, tmp_g;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2);
+ dst[0] =
+ dst[1] =
+ dst[2] = ubyte_to_float(tmp_r);
+ dst[3] = ubyte_to_float(tmp_g);
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_latc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ uint8_t tmp_r, tmp_g;
+
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2);
+ dst[0] =
+ dst[1] =
+ dst[2] = ubyte_to_float(tmp_r);
+ dst[3] = ubyte_to_float(tmp_g);
+}
+
+
+void
+util_format_latc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_latc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_latc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_latc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 16;
+
+ for(y = 0; y < height; y += 4) {
+ const int8_t *src = (int8_t *)src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ int8_t tmp_r, tmp_g;
+ u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2);
+ u_format_signed_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2);
+ dst[0] =
+ dst[1] =
+ dst[2] = byte_to_float_tex(tmp_r);
+ dst[3] = byte_to_float_tex(tmp_g);
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_latc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_rxtc2_snorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 3);
+}
+
+void
+util_format_latc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ int8_t tmp_r, tmp_g;
+
+ u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 2);
+ u_format_signed_fetch_texel_rgtc(0, (int8_t *)src + 8, i, j, &tmp_g, 2);
+ dst[0] =
+ dst[1] =
+ dst[2] = byte_to_float_tex(tmp_r);
+ dst[3] = byte_to_float_tex(tmp_g);
+}
+
+
+#define TAG(x) u_format_unsigned_##x
+#define TYPE uint8_t
+#define T_MIN 0
+#define T_MAX 255
+
+#include "../../../mesa/main/texcompress_rgtc_tmp.h"
+
+#undef TYPE
+#undef TAG
+#undef T_MIN
+#undef T_MAX
+
+
+#define TAG(x) u_format_signed_##x
+#define TYPE int8_t
+#define T_MIN (int8_t)-128
+#define T_MAX (int8_t)127
+
+#include "../../../mesa/main/texcompress_rgtc_tmp.h"
+
+#undef TYPE
+#undef TAG
+#undef T_MIN
+#undef T_MAX
diff --git a/src/gallium/auxiliary/util/u_format_latc.h b/src/gallium/auxiliary/util/u_format_latc.h
new file mode 100644
index 0000000000..1f08887533
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_latc.h
@@ -0,0 +1,108 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+#ifndef U_FORMAT_LATC_H_
+#define U_FORMAT_LATC_H_
+
+void
+util_format_latc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_latc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+
+void
+util_format_latc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_latc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+void
+util_format_latc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_latc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+void
+util_format_latc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_latc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_latc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
index 6d0016c0ad..cc173f808a 100644
--- a/src/gallium/auxiliary/util/u_format_pack.py
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -632,7 +632,7 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix):
def is_format_hand_written(format):
- return format.layout in ('s3tc', 'subsampled', 'other') or format.colorspace == ZS
+ return format.layout in ('s3tc', 'rgtc', 'subsampled', 'other') or format.colorspace == ZS
def generate(formats):
diff --git a/src/gallium/auxiliary/util/u_format_rgtc.c b/src/gallium/auxiliary/util/u_format_rgtc.c
new file mode 100644
index 0000000000..c929fd47e9
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_rgtc.c
@@ -0,0 +1,464 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+#include "u_math.h"
+#include "u_format.h"
+#include "u_format_rgtc.h"
+
+static void u_format_unsigned_encode_rgtc_chan(uint8_t *blkaddr, uint8_t srccolors[4][4],
+ int numxpixels, int numypixels);
+
+static void u_format_unsigned_fetch_texel_rgtc(unsigned srcRowStride, const uint8_t *pixdata,
+ unsigned i, unsigned j, uint8_t *value, unsigned comps);
+
+static void u_format_signed_encode_rgtc_chan(int8_t *blkaddr, int8_t srccolors[4][4],
+ int numxpixels, int numypixels);
+
+static void u_format_signed_fetch_texel_rgtc(unsigned srcRowStride, const int8_t *pixdata,
+ unsigned i, unsigned j, int8_t *value, unsigned comps);
+
+void
+util_format_rgtc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 1);
+}
+
+void
+util_format_rgtc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, comps = 4;
+ unsigned x, y, i, j;
+ unsigned block_size = 8;
+
+ for(y = 0; y < height; y += bh) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += bw) {
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 1);
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row,
+ unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 8;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp[j][i] = src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4];
+ }
+ }
+ u_format_unsigned_encode_rgtc_chan(dst, tmp, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 8;
+ for(y = 0; y < height; y += 4) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ uint8_t tmp_r;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1);
+ dst[0] = ubyte_to_float(tmp_r);
+ dst[1] = 0.0;
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 8;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]);
+ }
+ }
+ u_format_unsigned_encode_rgtc_chan(dst, tmp, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ uint8_t tmp_r;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1);
+ dst[0] = ubyte_to_float(tmp_r);
+ dst[1] = 0.0;
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+}
+
+void
+util_format_rgtc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 8;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ int8_t *dst = (int8_t *)dst_row;
+ for(x = 0; x < width; x += bw) {
+ int8_t tmp[4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]);
+ }
+ }
+ u_format_signed_encode_rgtc_chan(dst, tmp, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 8;
+ for(y = 0; y < height; y += 4) {
+ const int8_t *src = (int8_t *)src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ int8_t tmp_r;
+ u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1);
+ dst[0] = byte_to_float_tex(tmp_r);
+ dst[1] = 0.0;
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ int8_t tmp_r;
+ u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 1);
+ dst[0] = byte_to_float_tex(tmp_r);
+ dst[1] = 0.0;
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+}
+
+
+void
+util_format_rgtc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, dst + 1, 2);
+}
+
+void
+util_format_rgtc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, comps = 4;
+ unsigned x, y, i, j;
+ unsigned block_size = 16;
+
+ for(y = 0; y < height; y += bh) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += bw) {
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, dst + 1, 2);
+
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 16;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp_r[4][4]; /* [bh][bw] */
+ uint8_t tmp_g[4][4]; /* [bh][bw] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp_r[j][i] = src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4];
+ tmp_g[j][i] = src_row[((y + j)*src_stride/sizeof(*src_row) + (x + i)*4) + 1];
+ }
+ }
+ u_format_unsigned_encode_rgtc_chan(dst, tmp_r, 4, 4);
+ u_format_unsigned_encode_rgtc_chan(dst + 8, tmp_g, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rxtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height, unsigned chan2off)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 16;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp_r[4][4]; /* [bh][bw][comps] */
+ uint8_t tmp_g[4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp_r[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]);
+ tmp_g[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4 + chan2off]);
+ }
+ }
+ u_format_unsigned_encode_rgtc_chan(dst, tmp_r, 4, 4);
+ u_format_unsigned_encode_rgtc_chan(dst + 8, tmp_g, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_rxtc2_unorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 1);
+}
+
+void
+util_format_rgtc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 16;
+ for(y = 0; y < height; y += 4) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ uint8_t tmp_r, tmp_g;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2);
+ dst[0] = ubyte_to_float(tmp_r);
+ dst[1] = ubyte_to_float(tmp_g);
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ uint8_t tmp_r, tmp_g;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2);
+ dst[0] = ubyte_to_float(tmp_r);
+ dst[1] = ubyte_to_float(tmp_g);
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+}
+
+
+void
+util_format_rgtc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 16;
+ for(y = 0; y < height; y += 4) {
+ const int8_t *src = (int8_t *)src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ int8_t tmp_r, tmp_g;
+ u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2);
+ u_format_signed_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2);
+ dst[0] = byte_to_float_tex(tmp_r);
+ dst[1] = byte_to_float_tex(tmp_g);
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rxtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height, unsigned chan2off)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 16;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ int8_t *dst = (int8_t *)dst_row;
+ for(x = 0; x < width; x += bw) {
+ int8_t tmp_r[4][4]; /* [bh][bw][comps] */
+ int8_t tmp_g[4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp_r[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]);
+ tmp_g[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4 + chan2off]);
+ }
+ }
+ u_format_signed_encode_rgtc_chan(dst, tmp_r, 4, 4);
+ u_format_signed_encode_rgtc_chan(dst + 8, tmp_g, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_rxtc2_snorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 1);
+}
+
+void
+util_format_rgtc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ int8_t tmp_r, tmp_g;
+ u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 2);
+ u_format_signed_fetch_texel_rgtc(0, (int8_t *)src + 8, i, j, &tmp_g, 2);
+ dst[0] = byte_to_float_tex(tmp_r);
+ dst[1] = byte_to_float_tex(tmp_g);
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+}
+
+
+#define TAG(x) u_format_unsigned_##x
+#define TYPE uint8_t
+#define T_MIN 0
+#define T_MAX 255
+
+#include "../../../mesa/main/texcompress_rgtc_tmp.h"
+
+#undef TYPE
+#undef TAG
+#undef T_MIN
+#undef T_MAX
+
+
+#define TAG(x) u_format_signed_##x
+#define TYPE int8_t
+#define T_MIN (int8_t)-128
+#define T_MAX (int8_t)127
+
+#include "../../../mesa/main/texcompress_rgtc_tmp.h"
+
+#undef TYPE
+#undef TAG
+#undef T_MIN
+#undef T_MAX
diff --git a/src/gallium/auxiliary/util/u_format_rgtc.h b/src/gallium/auxiliary/util/u_format_rgtc.h
new file mode 100644
index 0000000000..67ac4728e5
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_rgtc.h
@@ -0,0 +1,114 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+#ifndef U_FORMAT_RGTC_H_
+#define U_FORMAT_RGTC_H_
+
+void
+util_format_rgtc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_rgtc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+
+void
+util_format_rgtc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_rgtc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+void
+util_format_rgtc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_rgtc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rxtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height, unsigned chan2off);
+
+void
+util_format_rgtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+void
+util_format_rgtc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_rgtc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rxtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height, unsigned chan2off);
+
+void
+util_format_rgtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index 8cc22a5637..55e0f7827e 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -87,6 +87,8 @@ def write_format_table(formats):
print
print '#include "u_format.h"'
print '#include "u_format_s3tc.h"'
+ print '#include "u_format_rgtc.h"'
+ print '#include "u_format_latc.h"'
print
u_format_pack.generate(formats)
@@ -132,7 +134,7 @@ def write_format_table(formats):
if format.colorspace != ZS:
print " &util_format_%s_unpack_rgba_8unorm," % format.short_name()
print " &util_format_%s_pack_rgba_8unorm," % format.short_name()
- if format.layout == 's3tc':
+ if format.layout == 's3tc' or format.layout == 'rgtc':
print " &util_format_%s_fetch_rgba_8unorm," % format.short_name()
else:
print " NULL, /* fetch_rgba_8unorm */"
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index d22ae8b375..4a1662462e 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -67,7 +67,7 @@ struct gen_mipmap_state
struct pipe_vertex_element velem[2];
void *vs;
- void *fs1d, *fs2d, *fs3d, *fsCube;
+ void *fs[TGSI_TEXTURE_COUNT]; /**< Not all are used, but simplifies code */
struct pipe_resource *vbuf; /**< quad vertices */
unsigned vbuf_slot;
@@ -1301,27 +1301,6 @@ util_create_gen_mipmap(struct pipe_context *pipe,
ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
}
- /* vertex shader - still needed to specify mapping from fragment
- * shader input semantics to vertex elements
- */
- {
- const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
- TGSI_SEMANTIC_GENERIC };
- const uint semantic_indexes[] = { 0, 0 };
- ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
- semantic_indexes);
- }
-
- /* fragment shader */
- ctx->fs1d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D,
- TGSI_INTERPOLATE_LINEAR);
- ctx->fs2d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D,
- TGSI_INTERPOLATE_LINEAR);
- ctx->fs3d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_3D,
- TGSI_INTERPOLATE_LINEAR);
- ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE,
- TGSI_INTERPOLATE_LINEAR);
-
/* vertex data that doesn't change */
for (i = 0; i < 4; i++) {
ctx->vertices[i][0][2] = 0.0f; /* z */
@@ -1336,6 +1315,44 @@ util_create_gen_mipmap(struct pipe_context *pipe,
/**
+ * Helper function to set the fragment shaders.
+ */
+static INLINE void
+set_fragment_shader(struct gen_mipmap_state *ctx, uint type)
+{
+ if (!ctx->fs[type])
+ ctx->fs[type] =
+ util_make_fragment_tex_shader(ctx->pipe, type,
+ TGSI_INTERPOLATE_LINEAR);
+
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs[type]);
+}
+
+
+/**
+ * Helper function to set the vertex shader.
+ */
+static INLINE void
+set_vertex_shader(struct gen_mipmap_state *ctx)
+{
+ /* vertex shader - still required to provide the linkage between
+ * fragment shader input semantics and vertex_element/buffers.
+ */
+ if (!ctx->vs)
+ {
+ const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_GENERIC };
+ const uint semantic_indexes[] = { 0, 0 };
+ ctx->vs = util_make_vertex_passthrough_shader(ctx->pipe, 2,
+ semantic_names,
+ semantic_indexes);
+ }
+
+ cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
+}
+
+
+/**
* Get next "slot" of vertex space in the vertex buffer.
* We're allocating one large vertex buffer and using it piece by piece.
*/
@@ -1350,6 +1367,7 @@ get_next_slot(struct gen_mipmap_state *ctx)
if (!ctx->vbuf) {
ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
max_slots * sizeof ctx->vertices);
}
@@ -1389,8 +1407,25 @@ set_vertex_data(struct gen_mipmap_state *ctx,
util_map_texcoords2d_onto_cubemap(layer, &st[0][0], 2,
&ctx->vertices[0][1][0], 8);
}
- else {
- /* 1D/2D/3D */
+ else if (tex_target == PIPE_TEXTURE_1D_ARRAY) {
+ /* 1D texture array */
+ ctx->vertices[0][1][0] = 0.0f; /*s*/
+ ctx->vertices[0][1][1] = r; /*t*/
+ ctx->vertices[0][1][2] = 0.0f; /*r*/
+
+ ctx->vertices[1][1][0] = 1.0f;
+ ctx->vertices[1][1][1] = r;
+ ctx->vertices[1][1][2] = 0.0f;
+
+ ctx->vertices[2][1][0] = 1.0f;
+ ctx->vertices[2][1][1] = r;
+ ctx->vertices[2][1][2] = 0.0f;
+
+ ctx->vertices[3][1][0] = 0.0f;
+ ctx->vertices[3][1][1] = r;
+ ctx->vertices[3][1][2] = 0.0f;
+ } else {
+ /* 1D/2D/3D/2D array */
ctx->vertices[0][1][0] = 0.0f; /*s*/
ctx->vertices[0][1][1] = 0.0f; /*t*/
ctx->vertices[0][1][2] = r; /*r*/
@@ -1425,12 +1460,14 @@ void
util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
{
struct pipe_context *pipe = ctx->pipe;
+ unsigned i;
+
+ for (i = 0; i < Elements(ctx->fs); i++)
+ if (ctx->fs[i])
+ pipe->delete_fs_state(pipe, ctx->fs[i]);
- pipe->delete_fs_state(pipe, ctx->fsCube);
- pipe->delete_fs_state(pipe, ctx->fs3d);
- pipe->delete_fs_state(pipe, ctx->fs2d);
- pipe->delete_fs_state(pipe, ctx->fs1d);
- pipe->delete_vs_state(pipe, ctx->vs);
+ if (ctx->vs)
+ pipe->delete_vs_state(pipe, ctx->vs);
pipe_resource_reference(&ctx->vbuf, NULL);
@@ -1469,9 +1506,9 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
struct pipe_screen *screen = pipe->screen;
struct pipe_framebuffer_state fb;
struct pipe_resource *pt = psv->texture;
- void *fs;
uint dstLevel;
uint offset;
+ uint type;
/* The texture object should have room for the levels which we're
* about to generate.
@@ -1486,27 +1523,31 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
switch (pt->target) {
case PIPE_TEXTURE_1D:
- fs = ctx->fs1d;
+ type = TGSI_TEXTURE_1D;
break;
case PIPE_TEXTURE_2D:
- fs = ctx->fs2d;
+ type = TGSI_TEXTURE_2D;
break;
case PIPE_TEXTURE_3D:
- fs = ctx->fs3d;
+ type = TGSI_TEXTURE_3D;
break;
case PIPE_TEXTURE_CUBE:
- fs = ctx->fsCube;
+ type = TGSI_TEXTURE_CUBE;
break;
case PIPE_TEXTURE_1D_ARRAY:
+ type = TGSI_TEXTURE_1D_ARRAY;
+ break;
case PIPE_TEXTURE_2D_ARRAY:
+ type = TGSI_TEXTURE_2D_ARRAY;
+ break;
default:
assert(0);
- fs = ctx->fs2d;
+ type = TGSI_TEXTURE_2D;
}
/* check if we can render in the texture's format */
if (!screen->is_format_supported(screen, psv->format, pt->target,
- pt->nr_samples, PIPE_BIND_RENDER_TARGET, 0)) {
+ pt->nr_samples, PIPE_BIND_RENDER_TARGET)) {
fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel);
return;
}
@@ -1531,8 +1572,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_set_clip(ctx->cso, &ctx->clip);
cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
- cso_set_fragment_shader_handle(ctx->cso, fs);
- cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
+ set_fragment_shader(ctx, type);
+ set_vertex_shader(ctx);
/* init framebuffer state */
memset(&fb, 0, sizeof(fb));
@@ -1554,6 +1595,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
if (pt->target == PIPE_TEXTURE_3D)
nr_layers = u_minify(pt->depth0, dstLevel);
+ else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY)
+ nr_layers = pt->array_size;
else
nr_layers = 1;
@@ -1563,11 +1606,12 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
/* in theory with geom shaders and driver with full layer support
could do that in one go. */
layer = i;
- offset = 1.0f / (float)(nr_layers * 2);
/* XXX hmm really? */
rcoord = (float)layer / (float)nr_layers + 1.0f / (float)(nr_layers * 2);
- }
- else
+ } else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY) {
+ layer = i;
+ rcoord = (float)layer;
+ } else
layer = face;
memset(&surf_templ, 0, sizeof(surf_templ));
@@ -1616,15 +1660,14 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
face,
rcoord);
- util_draw_vertex_buffer(ctx->pipe,
+ util_draw_vertex_buffer(ctx->pipe,
+ ctx->cso,
ctx->vbuf,
offset,
PIPE_PRIM_TRIANGLE_FAN,
4, /* verts */
2); /* attribs/vert */
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
-
/* need to signal that the texture has changed _after_ rendering to it */
pipe_surface_reference( &surf, NULL );
}
diff --git a/src/gallium/auxiliary/util/u_index_modify.c b/src/gallium/auxiliary/util/u_index_modify.c
index 65b079ed53..d0a28b5fdf 100644
--- a/src/gallium/auxiliary/util/u_index_modify.c
+++ b/src/gallium/auxiliary/util/u_index_modify.c
@@ -24,26 +24,77 @@
#include "util/u_index_modify.h"
#include "util/u_inlines.h"
+/* Ubyte indices. */
+
+void util_shorten_ubyte_elts_to_userptr(struct pipe_context *context,
+ struct pipe_resource *elts,
+ int index_bias,
+ unsigned start,
+ unsigned count,
+ void *out)
+{
+ struct pipe_transfer *src_transfer;
+ unsigned char *in_map;
+ unsigned short *out_map = out;
+ unsigned i;
+
+ in_map = pipe_buffer_map(context, elts,
+ PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_UNSYNCHRONIZED,
+ &src_transfer);
+ in_map += start;
+
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned short)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, src_transfer);
+}
+
void util_shorten_ubyte_elts(struct pipe_context *context,
struct pipe_resource **elts,
int index_bias,
unsigned start,
unsigned count)
{
- struct pipe_screen* screen = context->screen;
struct pipe_resource* new_elts;
- unsigned char *in_map;
unsigned short *out_map;
- struct pipe_transfer *src_transfer, *dst_transfer;
- unsigned i;
+ struct pipe_transfer *dst_transfer;
- new_elts = pipe_buffer_create(screen,
+ new_elts = pipe_buffer_create(context->screen,
PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STATIC,
2 * count);
- in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer);
- out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer);
+ out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE,
+ &dst_transfer);
+ util_shorten_ubyte_elts_to_userptr(context, *elts, index_bias,
+ start, count, out_map);
+ pipe_buffer_unmap(context, dst_transfer);
+
+ *elts = new_elts;
+}
+
+
+/* Ushort indices. */
+void util_rebuild_ushort_elts_to_userptr(struct pipe_context *context,
+ struct pipe_resource *elts,
+ int index_bias,
+ unsigned start, unsigned count,
+ void *out)
+{
+ struct pipe_transfer *in_transfer = NULL;
+ unsigned short *in_map;
+ unsigned short *out_map = out;
+ unsigned i;
+
+ in_map = pipe_buffer_map(context, elts,
+ PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_UNSYNCHRONIZED,
+ &in_transfer);
in_map += start;
for (i = 0; i < count; i++) {
@@ -52,10 +103,7 @@ void util_shorten_ubyte_elts(struct pipe_context *context,
out_map++;
}
- pipe_buffer_unmap(context, *elts, src_transfer);
- pipe_buffer_unmap(context, new_elts, dst_transfer);
-
- *elts = new_elts;
+ pipe_buffer_unmap(context, in_transfer);
}
void util_rebuild_ushort_elts(struct pipe_context *context,
@@ -63,33 +111,51 @@ void util_rebuild_ushort_elts(struct pipe_context *context,
int index_bias,
unsigned start, unsigned count)
{
- struct pipe_transfer *in_transfer = NULL;
struct pipe_transfer *out_transfer = NULL;
struct pipe_resource *new_elts;
- unsigned short *in_map;
unsigned short *out_map;
- unsigned i;
new_elts = pipe_buffer_create(context->screen,
PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STATIC,
2 * count);
- in_map = pipe_buffer_map(context, *elts,
- PIPE_TRANSFER_READ, &in_transfer);
out_map = pipe_buffer_map(context, new_elts,
PIPE_TRANSFER_WRITE, &out_transfer);
+ util_rebuild_ushort_elts_to_userptr(context, *elts, index_bias,
+ start, count, out_map);
+ pipe_buffer_unmap(context, out_transfer);
+
+ *elts = new_elts;
+}
+
+
+/* Uint indices. */
+void util_rebuild_uint_elts_to_userptr(struct pipe_context *context,
+ struct pipe_resource *elts,
+ int index_bias,
+ unsigned start, unsigned count,
+ void *out)
+{
+ struct pipe_transfer *in_transfer = NULL;
+ unsigned int *in_map;
+ unsigned int *out_map = out;
+ unsigned i;
+
+ in_map = pipe_buffer_map(context, elts,
+ PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_UNSYNCHRONIZED,
+ &in_transfer);
in_map += start;
+
for (i = 0; i < count; i++) {
- *out_map = (unsigned short)(*in_map + index_bias);
+ *out_map = (unsigned int)(*in_map + index_bias);
in_map++;
out_map++;
}
- pipe_buffer_unmap(context, *elts, in_transfer);
- pipe_buffer_unmap(context, new_elts, out_transfer);
-
- *elts = new_elts;
+ pipe_buffer_unmap(context, in_transfer);
}
void util_rebuild_uint_elts(struct pipe_context *context,
@@ -97,31 +163,20 @@ void util_rebuild_uint_elts(struct pipe_context *context,
int index_bias,
unsigned start, unsigned count)
{
- struct pipe_transfer *in_transfer = NULL;
struct pipe_transfer *out_transfer = NULL;
struct pipe_resource *new_elts;
- unsigned int *in_map;
unsigned int *out_map;
- unsigned i;
new_elts = pipe_buffer_create(context->screen,
PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STATIC,
2 * count);
- in_map = pipe_buffer_map(context, *elts,
- PIPE_TRANSFER_READ, &in_transfer);
out_map = pipe_buffer_map(context, new_elts,
PIPE_TRANSFER_WRITE, &out_transfer);
-
- in_map += start;
- for (i = 0; i < count; i++) {
- *out_map = (unsigned int)(*in_map + index_bias);
- in_map++;
- out_map++;
- }
-
- pipe_buffer_unmap(context, *elts, in_transfer);
- pipe_buffer_unmap(context, new_elts, out_transfer);
+ util_rebuild_uint_elts_to_userptr(context, *elts, index_bias,
+ start, count, out_map);
+ pipe_buffer_unmap(context, out_transfer);
*elts = new_elts;
}
diff --git a/src/gallium/auxiliary/util/u_index_modify.h b/src/gallium/auxiliary/util/u_index_modify.h
index 01a6cae94f..1e9de3dfac 100644
--- a/src/gallium/auxiliary/util/u_index_modify.h
+++ b/src/gallium/auxiliary/util/u_index_modify.h
@@ -23,19 +23,46 @@
#ifndef UTIL_INDEX_MODIFY_H
#define UTIL_INDEX_MODIFY_H
+struct pipe_context;
+struct pipe_resource;
+
+void util_shorten_ubyte_elts_to_userptr(struct pipe_context *context,
+ struct pipe_resource *elts,
+ int index_bias,
+ unsigned start,
+ unsigned count,
+ void *out);
+
void util_shorten_ubyte_elts(struct pipe_context *context,
struct pipe_resource **elts,
int index_bias,
unsigned start,
unsigned count);
+
+
+void util_rebuild_ushort_elts_to_userptr(struct pipe_context *context,
+ struct pipe_resource *elts,
+ int index_bias,
+ unsigned start, unsigned count,
+ void *out);
+
void util_rebuild_ushort_elts(struct pipe_context *context,
struct pipe_resource **elts,
int index_bias,
unsigned start, unsigned count);
+
+
+void util_rebuild_uint_elts_to_userptr(struct pipe_context *context,
+ struct pipe_resource *elts,
+ int index_bias,
+ unsigned start, unsigned count,
+ void *out);
+
void util_rebuild_uint_elts(struct pipe_context *context,
struct pipe_resource **elts,
int index_bias,
unsigned start, unsigned count);
+
#endif
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index e55aafe90f..ddb81b5b95 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -160,6 +160,21 @@ pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps,
pipe_surface_reset(ctx, ps, pt, level, layer, flags);
}
+/* Return true if the surfaces are equal. */
+static INLINE boolean
+pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2)
+{
+ return s1->texture == s2->texture &&
+ s1->format == s2->format &&
+ (s1->texture->target != PIPE_BUFFER ||
+ (s1->u.buf.first_element == s2->u.buf.first_element &&
+ s1->u.buf.last_element == s2->u.buf.last_element)) &&
+ (s1->texture->target == PIPE_BUFFER ||
+ (s1->u.tex.level == s2->u.tex.level &&
+ s1->u.tex.first_layer == s2->u.tex.first_layer &&
+ s1->u.tex.last_layer == s2->u.tex.last_layer));
+}
+
/*
* Convenience wrappers for screen buffer functions.
*/
@@ -167,6 +182,7 @@ pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps,
static INLINE struct pipe_resource *
pipe_buffer_create( struct pipe_screen *screen,
unsigned bind,
+ unsigned usage,
unsigned size )
{
struct pipe_resource buffer;
@@ -174,7 +190,7 @@ pipe_buffer_create( struct pipe_screen *screen,
buffer.target = PIPE_BUFFER;
buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */
buffer.bind = bind;
- buffer.usage = PIPE_USAGE_DEFAULT;
+ buffer.usage = usage;
buffer.flags = 0;
buffer.width0 = size;
buffer.height0 = 1;
@@ -220,6 +236,7 @@ pipe_buffer_map_range(struct pipe_context *pipe,
map = pipe->transfer_map( pipe, *transfer );
if (map == NULL) {
pipe->transfer_destroy( pipe, *transfer );
+ *transfer = NULL;
return NULL;
}
@@ -242,7 +259,6 @@ pipe_buffer_map(struct pipe_context *pipe,
static INLINE void
pipe_buffer_unmap(struct pipe_context *pipe,
- struct pipe_resource *buf,
struct pipe_transfer *transfer)
{
if (transfer) {
@@ -283,13 +299,20 @@ pipe_buffer_write(struct pipe_context *pipe,
const void *data)
{
struct pipe_box box;
+ unsigned usage = PIPE_TRANSFER_WRITE;
+
+ if (offset == 0 && size == buf->width0) {
+ usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+ } else {
+ usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ }
u_box_1d(offset, size, &box);
pipe->transfer_inline_write( pipe,
buf,
0,
- PIPE_TRANSFER_WRITE,
+ usage,
&box,
data,
size,
@@ -341,7 +364,7 @@ pipe_buffer_read(struct pipe_context *pipe,
if (map)
memcpy(data, map + offset, size);
- pipe_buffer_unmap(pipe, buf, src_transfer);
+ pipe_buffer_unmap(pipe, src_transfer);
}
static INLINE struct pipe_transfer *
@@ -401,6 +424,34 @@ static INLINE boolean util_get_offset(
}
}
+/**
+ * This function is used to copy an array of pipe_vertex_buffer structures,
+ * while properly referencing the pipe_vertex_buffer::buffer member.
+ *
+ * \sa util_copy_framebuffer_state
+ */
+static INLINE void util_copy_vertex_buffers(struct pipe_vertex_buffer *dst,
+ unsigned *dst_count,
+ const struct pipe_vertex_buffer *src,
+ unsigned src_count)
+{
+ unsigned i;
+
+ /* Reference the buffers of 'src' in 'dst'. */
+ for (i = 0; i < src_count; i++) {
+ pipe_resource_reference(&dst[i].buffer, src[i].buffer);
+ }
+ /* Unreference the rest of the buffers in 'dst'. */
+ for (; i < *dst_count; i++) {
+ pipe_resource_reference(&dst[i].buffer, NULL);
+ }
+
+ /* Update the size of 'dst' and copy over the other members
+ * of pipe_vertex_buffer. */
+ *dst_count = src_count;
+ memcpy(dst, src, src_count * sizeof(struct pipe_vertex_buffer));
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index cfcb9cd857..00653562ad 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -176,7 +176,7 @@ static INLINE float logf( float f )
#define isfinite(x) _finite((double)(x))
#define isnan(x) _isnan((double)(x))
-#endif
+#endif /* _MSC_VER < 1400 && !defined(__cplusplus) */
static INLINE double log2( double x )
{
@@ -184,6 +184,18 @@ static INLINE double log2( double x )
return log( x ) * invln2;
}
+static INLINE double
+round(double x)
+{
+ return x >= 0.0 ? floor(x + 0.5) : ceil(x - 0.5);
+}
+
+static INLINE float
+roundf(float x)
+{
+ return x >= 0.0f ? floorf(x + 0.5f) : ceilf(x - 0.5f);
+}
+
#endif /* _MSC_VER */
@@ -473,6 +485,17 @@ float_to_ubyte(float f)
}
}
+static INLINE float
+byte_to_float_tex(int8_t b)
+{
+ return (b == -128) ? -1.0F : b * 1.0F / 127.0F;
+}
+
+static INLINE int8_t
+float_to_byte_tex(float f)
+{
+ return (int8_t) (127.0F * f);
+}
/**
* Calc log base 2
diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c
new file mode 100644
index 0000000000..f79a6938d1
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -0,0 +1,434 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Polygon stipple helper module. Drivers/GPUs which don't support polygon
+ * stipple natively can use this module to simulate it.
+ *
+ * Basically, modify fragment shader to sample the 32x32 stipple pattern
+ * texture and do a fragment kill for the 'off' bits.
+ *
+ * This was originally a 'draw' module stage, but since we don't need
+ * vertex window coords or anything, it can be a stand-alone utility module.
+ *
+ * Authors: Brian Paul
+ */
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_inlines.h"
+
+#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_pstipple.h"
+#include "util/u_sampler.h"
+
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_dump.h"
+
+/** Approx number of new tokens for instructions in pstip_transform_inst() */
+#define NUM_NEW_TOKENS 50
+
+
+static void
+util_pstipple_update_stipple_texture(struct pipe_context *pipe,
+ struct pipe_resource *tex,
+ const uint32_t pattern[32])
+{
+ static const uint bit31 = 1 << 31;
+ struct pipe_transfer *transfer;
+ ubyte *data;
+ int i, j;
+
+ /* map texture memory */
+ transfer = pipe_get_transfer(pipe, tex, 0, 0,
+ PIPE_TRANSFER_WRITE, 0, 0, 32, 32);
+ data = pipe->transfer_map(pipe, transfer);
+
+ /*
+ * Load alpha texture.
+ * Note: 0 means keep the fragment, 255 means kill it.
+ * We'll negate the texel value and use KILP which kills if value
+ * is negative.
+ */
+ for (i = 0; i < 32; i++) {
+ for (j = 0; j < 32; j++) {
+ if (pattern[i] & (bit31 >> j)) {
+ /* fragment "on" */
+ data[i * transfer->stride + j] = 0;
+ }
+ else {
+ /* fragment "off" */
+ data[i * transfer->stride + j] = 255;
+ }
+ }
+ }
+
+ /* unmap */
+ pipe->transfer_unmap(pipe, transfer);
+ pipe->transfer_destroy(pipe, transfer);
+}
+
+
+/**
+ * Create a 32x32 alpha8 texture that encodes the given stipple pattern.
+ */
+struct pipe_resource *
+util_pstipple_create_stipple_texture(struct pipe_context *pipe,
+ const uint32_t pattern[32])
+{
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_resource templat, *tex;
+
+ memset(&templat, 0, sizeof(templat));
+ templat.target = PIPE_TEXTURE_2D;
+ templat.format = PIPE_FORMAT_A8_UNORM;
+ templat.last_level = 0;
+ templat.width0 = 32;
+ templat.height0 = 32;
+ templat.depth0 = 1;
+ templat.array_size = 1;
+ templat.bind = PIPE_BIND_SAMPLER_VIEW;
+
+ tex = screen->resource_create(screen, &templat);
+
+ if (tex)
+ util_pstipple_update_stipple_texture(pipe, tex, pattern);
+
+ return tex;
+}
+
+
+/**
+ * Create sampler view to sample the stipple texture.
+ */
+struct pipe_sampler_view *
+util_pstipple_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *tex)
+{
+ struct pipe_sampler_view templat, *sv;
+
+ u_sampler_view_default_template(&templat, tex, tex->format);
+ sv = pipe->create_sampler_view(pipe, tex, &templat);
+
+ return sv;
+}
+
+
+/**
+ * Create the sampler CSO that'll be used for stippling.
+ */
+void *
+util_pstipple_create_sampler(struct pipe_context *pipe)
+{
+ struct pipe_sampler_state templat;
+ void *s;
+
+ memset(&templat, 0, sizeof(templat));
+ templat.wrap_s = PIPE_TEX_WRAP_REPEAT;
+ templat.wrap_t = PIPE_TEX_WRAP_REPEAT;
+ templat.wrap_r = PIPE_TEX_WRAP_REPEAT;
+ templat.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ templat.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+ templat.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ templat.normalized_coords = 1;
+ templat.min_lod = 0.0f;
+ templat.max_lod = 0.0f;
+
+ s = pipe->create_sampler_state(pipe, &templat);
+ return s;
+}
+
+
+
+/**
+ * Subclass of tgsi_transform_context, used for transforming the
+ * user's fragment shader to add the extra texture sample and fragment kill
+ * instructions.
+ */
+struct pstip_transform_context {
+ struct tgsi_transform_context base;
+ uint tempsUsed; /**< bitmask */
+ int wincoordInput;
+ int maxInput;
+ uint samplersUsed; /**< bitfield of samplers used */
+ int freeSampler; /** an available sampler for the pstipple */
+ int texTemp; /**< temp registers */
+ int numImmed;
+ boolean firstInstruction;
+};
+
+
+/**
+ * TGSI declaration transform callback.
+ * Look for a free sampler, a free input attrib, and two free temp regs.
+ */
+static void
+pstip_transform_decl(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *decl)
+{
+ struct pstip_transform_context *pctx =
+ (struct pstip_transform_context *) ctx;
+
+ if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
+ uint i;
+ for (i = decl->Range.First;
+ i <= decl->Range.Last; i++) {
+ pctx->samplersUsed |= 1 << i;
+ }
+ }
+ else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last);
+ if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
+ pctx->wincoordInput = (int) decl->Range.First;
+ }
+ else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+ uint i;
+ for (i = decl->Range.First;
+ i <= decl->Range.Last; i++) {
+ pctx->tempsUsed |= (1 << i);
+ }
+ }
+
+ ctx->emit_declaration(ctx, decl);
+}
+
+
+static void
+pstip_transform_immed(struct tgsi_transform_context *ctx,
+ struct tgsi_full_immediate *immed)
+{
+ struct pstip_transform_context *pctx =
+ (struct pstip_transform_context *) ctx;
+ pctx->numImmed++;
+}
+
+
+/**
+ * Find the lowest zero bit in the given word, or -1 if bitfield is all ones.
+ */
+static int
+free_bit(uint bitfield)
+{
+ return ffs(~bitfield) - 1;
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ * Replace writes to result.color w/ a temp reg.
+ * Upon END instruction, insert texture sampling code for antialiasing.
+ */
+static void
+pstip_transform_inst(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct pstip_transform_context *pctx =
+ (struct pstip_transform_context *) ctx;
+
+ if (pctx->firstInstruction) {
+ /* emit our new declarations before the first instruction */
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction newInst;
+ uint i;
+ int wincoordInput;
+
+ /* find free sampler */
+ pctx->freeSampler = free_bit(pctx->samplersUsed);
+ if (pctx->freeSampler >= PIPE_MAX_SAMPLERS)
+ pctx->freeSampler = PIPE_MAX_SAMPLERS - 1;
+
+ if (pctx->wincoordInput < 0)
+ wincoordInput = pctx->maxInput + 1;
+ else
+ wincoordInput = pctx->wincoordInput;
+
+ /* find one free temp reg */
+ for (i = 0; i < 32; i++) {
+ if ((pctx->tempsUsed & (1 << i)) == 0) {
+ /* found a free temp */
+ if (pctx->texTemp < 0)
+ pctx->texTemp = i;
+ else
+ break;
+ }
+ }
+ assert(pctx->texTemp >= 0);
+
+ if (pctx->wincoordInput < 0) {
+ /* declare new position input reg */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.Name = TGSI_SEMANTIC_POSITION;
+ decl.Semantic.Index = 0;
+ decl.Range.First =
+ decl.Range.Last = wincoordInput;
+ ctx->emit_declaration(ctx, &decl);
+ }
+
+ /* declare new sampler */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.Range.First =
+ decl.Range.Last = pctx->freeSampler;
+ ctx->emit_declaration(ctx, &decl);
+
+ /* declare new temp regs */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.Range.First =
+ decl.Range.Last = pctx->texTemp;
+ ctx->emit_declaration(ctx, &decl);
+
+ /* emit immediate = {1/32, 1/32, 1, 1}
+ * The index/position of this immediate will be pctx->numImmed
+ */
+ {
+ static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 };
+ struct tgsi_full_immediate immed;
+ uint size = 4;
+ immed = tgsi_default_full_immediate();
+ immed.Immediate.NrTokens = 1 + size; /* one for the token itself */
+ immed.u[0].Float = value[0];
+ immed.u[1].Float = value[1];
+ immed.u[2].Float = value[2];
+ immed.u[3].Float = value[3];
+ ctx->emit_immediate(ctx, &immed);
+ }
+
+ pctx->firstInstruction = FALSE;
+
+
+ /*
+ * Insert new MUL/TEX/KILP instructions at start of program
+ * Take gl_FragCoord, divide by 32 (stipple size), sample the
+ * texture and kill fragment if needed.
+ *
+ * We'd like to use non-normalized texcoords to index into a RECT
+ * texture, but we can only use REPEAT wrap mode with normalized
+ * texcoords. Darn.
+ */
+
+ /* XXX invert wincoord if origin isn't lower-left... */
+
+ /* MUL texTemp, INPUT[wincoord], 1/32; */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ newInst.Dst[0].Register.Index = pctx->texTemp;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.Src[0].Register.File = TGSI_FILE_INPUT;
+ newInst.Src[0].Register.Index = wincoordInput;
+ newInst.Src[1].Register.File = TGSI_FILE_IMMEDIATE;
+ newInst.Src[1].Register.Index = pctx->numImmed;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* TEX texTemp, texTemp, sampler; */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ newInst.Dst[0].Register.Index = pctx->texTemp;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.Instruction.Texture = TRUE;
+ newInst.Texture.Texture = TGSI_TEXTURE_2D;
+ newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+ newInst.Src[0].Register.Index = pctx->texTemp;
+ newInst.Src[1].Register.File = TGSI_FILE_SAMPLER;
+ newInst.Src[1].Register.Index = pctx->freeSampler;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* KIL -texTemp; # if -texTemp < 0, KILL fragment */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
+ newInst.Instruction.NumDstRegs = 0;
+ newInst.Instruction.NumSrcRegs = 1;
+ newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+ newInst.Src[0].Register.Index = pctx->texTemp;
+ newInst.Src[0].Register.Negate = 1;
+ ctx->emit_instruction(ctx, &newInst);
+ }
+
+ /* emit this instruction */
+ ctx->emit_instruction(ctx, inst);
+}
+
+
+/**
+ * Given a fragment shader, return a new fragment shader which
+ * samples a stipple texture and executes KILL.
+ */
+struct pipe_shader_state *
+util_pstipple_create_fragment_shader(struct pipe_context *pipe,
+ struct pipe_shader_state *fs,
+ unsigned *samplerUnitOut)
+{
+ struct pipe_shader_state *new_fs;
+ struct pstip_transform_context transform;
+ const uint newLen = tgsi_num_tokens(fs->tokens) + NUM_NEW_TOKENS;
+
+ new_fs = MALLOC(sizeof(*new_fs));
+ if (!new_fs)
+ return NULL;
+
+ new_fs->tokens = tgsi_alloc_tokens(newLen);
+ if (!new_fs->tokens) {
+ FREE(new_fs);
+ return NULL;
+ }
+
+ memset(&transform, 0, sizeof(transform));
+ transform.wincoordInput = -1;
+ transform.maxInput = -1;
+ transform.texTemp = -1;
+ transform.firstInstruction = TRUE;
+ transform.base.transform_instruction = pstip_transform_inst;
+ transform.base.transform_declaration = pstip_transform_decl;
+ transform.base.transform_immediate = pstip_transform_immed;
+
+ tgsi_transform_shader(fs->tokens,
+ (struct tgsi_token *) new_fs->tokens,
+ newLen, &transform.base);
+
+#if 0 /* DEBUG */
+ tgsi_dump(fs->tokens, 0);
+ tgsi_dump(pstip_fs.tokens, 0);
+#endif
+
+ assert(transform.freeSampler < PIPE_MAX_SAMPLERS);
+ *samplerUnitOut = transform.freeSampler;
+
+ return new_fs;
+}
+
diff --git a/src/gallium/targets/dri-noop/swrast_drm_api.c b/src/gallium/auxiliary/util/u_pstipple.h
index a99779f183..1c2f5f48af 100644
--- a/src/gallium/targets/dri-noop/swrast_drm_api.c
+++ b/src/gallium/auxiliary/util/u_pstipple.h
@@ -1,8 +1,8 @@
/**************************************************************************
- *
- * Copyright 2009, VMware, Inc.
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2010 VMware, Inc.
* All Rights Reserved.
- * Copyright 2010 George Sapountzis <gsapountzis@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -11,53 +11,46 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
-#include "pipe/p_compiler.h"
-#include "util/u_memory.h"
-#include "dri_sw_winsys.h"
-#include "noop/noop_public.h"
-
-#include "target-helpers/inline_debug_helper.h"
-#include "target-helpers/inline_sw_helper.h"
+#ifndef U_PSTIPPLE_H
+#define U_PSTIPPLE_H
+#include "pipe/p_compiler.h"
-struct pipe_screen *
-drisw_create_screen(struct drisw_loader_funcs *lf)
-{
- struct sw_winsys *winsys = NULL;
- struct pipe_screen *screen = NULL;
+struct pipe_context;
+struct pipe_resource;
+struct pipe_shader_state;
- winsys = dri_create_sw_winsys(lf);
- if (winsys == NULL)
- return NULL;
- screen = noop_screen_create(winsys);
- if (!screen)
- goto fail;
+extern struct pipe_resource *
+util_pstipple_create_stipple_texture(struct pipe_context *pipe,
+ const uint32_t pattern[32]);
- screen = debug_screen_wrap(screen);
+extern struct pipe_sampler_view *
+util_pstipple_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *tex);
- return screen;
+extern void *
+util_pstipple_create_sampler(struct pipe_context *pipe);
-fail:
- if (winsys)
- winsys->destroy(winsys);
+extern struct pipe_shader_state *
+util_pstipple_create_fragment_shader(struct pipe_context *pipe,
+ struct pipe_shader_state *fs,
+ unsigned *samplerUnitOut);
- return NULL;
-}
-/* vim: set sw=3 ts=8 sts=3 expandtab: */
+#endif
diff --git a/src/gallium/auxiliary/util/u_resource.c b/src/gallium/auxiliary/util/u_resource.c
index 443c9f8067..50a7cd4d55 100644
--- a/src/gallium/auxiliary/util/u_resource.c
+++ b/src/gallium/auxiliary/util/u_resource.c
@@ -24,18 +24,10 @@ void u_resource_destroy_vtbl(struct pipe_screen *screen,
ur->vtbl->resource_destroy(screen, resource);
}
-unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level, int layer)
-{
- struct u_resource *ur = u_resource(resource);
- return ur->vtbl->is_resource_referenced(pipe, resource, level, layer);
-}
-
struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context,
struct pipe_resource *resource,
unsigned level,
- enum pipe_transfer_usage usage,
+ unsigned usage,
const struct pipe_box *box)
{
struct u_resource *ur = u_resource(resource);
diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h
index 7139aaabc5..7caeb75cd2 100644
--- a/src/gallium/auxiliary/util/u_simple_screen.h
+++ b/src/gallium/auxiliary/util/u_simple_screen.h
@@ -159,7 +159,8 @@ struct pipe_winsys
*/
int (*fence_finish)( struct pipe_winsys *ws,
struct pipe_fence_handle *fence,
- unsigned flag );
+ unsigned flags,
+ uint64_t timeout );
};
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index 4eddd3f519..9caf76c802 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -84,7 +84,7 @@ util_create_rgba_surface(struct pipe_context *pipe,
/* Choose surface format */
for (i = 0; rgbaFormats[i]; i++) {
if (screen->is_format_supported(screen, rgbaFormats[i],
- target, 0, bind, 0)) {
+ target, 0, bind)) {
format = rgbaFormats[i];
break;
}
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index 44cadbfcdd..e3c7085ba9 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -367,47 +367,19 @@ pipe_get_tile_rgba(struct pipe_context *pipe,
uint x, uint y, uint w, uint h,
float *p)
{
- unsigned dst_stride = w * 4;
- void *packed;
- enum pipe_format format = pt->resource->format;
-
- if (u_clip_tile(x, y, &w, &h, &pt->box))
- return;
-
- packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format));
-
- if (!packed)
- return;
-
- if(format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV)
- assert((x & 1) == 0);
-
- pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0);
-
- pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride);
-
- FREE(packed);
+ pipe_get_tile_rgba_format(pipe, pt, x, y, w, h, pt->resource->format, p);
}
void
-pipe_get_tile_swizzle(struct pipe_context *pipe,
- struct pipe_transfer *pt,
- uint x,
- uint y,
- uint w,
- uint h,
- uint swizzle_r,
- uint swizzle_g,
- uint swizzle_b,
- uint swizzle_a,
- enum pipe_format format,
- float *p)
+pipe_get_tile_rgba_format(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
+ uint x, uint y, uint w, uint h,
+ enum pipe_format format,
+ float *p)
{
unsigned dst_stride = w * 4;
void *packed;
- uint iy;
- float rgba01[6];
if (u_clip_tile(x, y, &w, &h, &pt->box)) {
return;
@@ -427,35 +399,6 @@ pipe_get_tile_swizzle(struct pipe_context *pipe,
pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride);
FREE(packed);
-
- if (swizzle_r == PIPE_SWIZZLE_RED &&
- swizzle_g == PIPE_SWIZZLE_GREEN &&
- swizzle_b == PIPE_SWIZZLE_BLUE &&
- swizzle_a == PIPE_SWIZZLE_ALPHA) {
- /* no-op, skip */
- return;
- }
-
- rgba01[PIPE_SWIZZLE_ZERO] = 0.0f;
- rgba01[PIPE_SWIZZLE_ONE] = 1.0f;
-
- for (iy = 0; iy < h; iy++) {
- float *row = p;
- uint ix;
-
- for (ix = 0; ix < w; ix++) {
- rgba01[PIPE_SWIZZLE_RED] = row[0];
- rgba01[PIPE_SWIZZLE_GREEN] = row[1];
- rgba01[PIPE_SWIZZLE_BLUE] = row[2];
- rgba01[PIPE_SWIZZLE_ALPHA] = row[3];
-
- *row++ = rgba01[swizzle_r];
- *row++ = rgba01[swizzle_g];
- *row++ = rgba01[swizzle_b];
- *row++ = rgba01[swizzle_a];
- }
- p += dst_stride;
- }
}
@@ -465,9 +408,19 @@ pipe_put_tile_rgba(struct pipe_context *pipe,
uint x, uint y, uint w, uint h,
const float *p)
{
+ pipe_put_tile_rgba_format(pipe, pt, x, y, w, h, pt->resource->format, p);
+}
+
+
+void
+pipe_put_tile_rgba_format(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
+ uint x, uint y, uint w, uint h,
+ enum pipe_format format,
+ const float *p)
+{
unsigned src_stride = w * 4;
void *packed;
- enum pipe_format format = pt->resource->format;
if (u_clip_tile(x, y, &w, &h, &pt->box))
return;
diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
index 558351d0ce..0ba274308f 100644
--- a/src/gallium/auxiliary/util/u_tile.h
+++ b/src/gallium/auxiliary/util/u_tile.h
@@ -80,18 +80,11 @@ pipe_get_tile_rgba(struct pipe_context *pipe,
float *p);
void
-pipe_get_tile_swizzle(struct pipe_context *pipe,
- struct pipe_transfer *pt,
- uint x,
- uint y,
- uint w,
- uint h,
- uint swizzle_r,
- uint swizzle_g,
- uint swizzle_b,
- uint swizzle_a,
- enum pipe_format format,
- float *p);
+pipe_get_tile_rgba_format(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
+ uint x, uint y, uint w, uint h,
+ enum pipe_format format,
+ float *p);
void
pipe_put_tile_rgba(struct pipe_context *pipe,
@@ -99,6 +92,13 @@ pipe_put_tile_rgba(struct pipe_context *pipe,
uint x, uint y, uint w, uint h,
const float *p);
+void
+pipe_put_tile_rgba_format(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
+ uint x, uint y, uint w, uint h,
+ enum pipe_format format,
+ const float *p);
+
void
pipe_get_tile_z(struct pipe_context *pipe,
diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c
index e2828cfd99..7e72a59ee0 100644
--- a/src/gallium/auxiliary/util/u_transfer.c
+++ b/src/gallium/auxiliary/util/u_transfer.c
@@ -73,13 +73,6 @@ void u_default_transfer_flush_region( struct pipe_context *pipe,
*/
}
-unsigned u_default_is_resource_referenced( struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level, int layer)
-{
- return 0;
-}
-
struct pipe_transfer * u_default_get_transfer(struct pipe_context *context,
struct pipe_resource *resource,
unsigned level,
@@ -112,3 +105,10 @@ void u_default_transfer_destroy(struct pipe_context *pipe,
FREE(transfer);
}
+void u_default_redefine_user_buffer(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned offset,
+ unsigned size)
+{
+ resource->width0 = MAX2(resource->width0, offset + size);
+}
diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h
index 3412e13c3c..5b5ddeb4ab 100644
--- a/src/gallium/auxiliary/util/u_transfer.h
+++ b/src/gallium/auxiliary/util/u_transfer.h
@@ -27,10 +27,6 @@ void u_default_transfer_flush_region( struct pipe_context *pipe,
struct pipe_transfer *transfer,
const struct pipe_box *box);
-unsigned u_default_is_resource_referenced( struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level, int layer);
-
struct pipe_transfer * u_default_get_transfer(struct pipe_context *context,
struct pipe_resource *resource,
unsigned level,
@@ -57,10 +53,6 @@ struct u_resource_vtbl {
void (*resource_destroy)(struct pipe_screen *,
struct pipe_resource *pt);
- unsigned (*is_resource_referenced)(struct pipe_context *pipe,
- struct pipe_resource *texture,
- unsigned level, int layer);
-
struct pipe_transfer *(*get_transfer)(struct pipe_context *,
struct pipe_resource *resource,
unsigned level,
@@ -93,7 +85,7 @@ struct u_resource_vtbl {
struct u_resource {
struct pipe_resource b;
- struct u_resource_vtbl *vtbl;
+ const struct u_resource_vtbl *vtbl;
};
@@ -104,10 +96,6 @@ boolean u_resource_get_handle_vtbl(struct pipe_screen *screen,
void u_resource_destroy_vtbl(struct pipe_screen *screen,
struct pipe_resource *resource);
-unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level, int layer);
-
struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context,
struct pipe_resource *resource,
unsigned level,
@@ -136,11 +124,9 @@ void u_transfer_inline_write_vtbl( struct pipe_context *rm_ctx,
unsigned stride,
unsigned layer_stride);
-
-
-
-
-
-
+void u_default_redefine_user_buffer(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned offset,
+ unsigned size);
#endif
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c
index af229e61a0..9562acb821 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -41,22 +41,23 @@
struct u_upload_mgr {
struct pipe_context *pipe;
- unsigned default_size;
- unsigned alignment;
- unsigned usage;
-
- /* The active buffer:
- */
- struct pipe_resource *buffer;
- unsigned size;
- unsigned offset;
+ unsigned default_size; /* Minimum size of the upload buffer, in bytes. */
+ unsigned alignment; /* Alignment of each sub-allocation. */
+ unsigned bind; /* Bitmask of PIPE_BIND_* flags. */
+
+ struct pipe_resource *buffer; /* Upload buffer. */
+ struct pipe_transfer *transfer; /* Transfer object for the upload buffer. */
+ uint8_t *map; /* Pointer to the mapped upload buffer. */
+ unsigned size; /* Actual size of the upload buffer. */
+ unsigned offset; /* Aligned offset to the upload buffer, pointing
+ * at the first unused byte. */
};
struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
unsigned default_size,
unsigned alignment,
- unsigned usage )
+ unsigned bind )
{
struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr );
if (!upload)
@@ -65,54 +66,12 @@ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
upload->pipe = pipe;
upload->default_size = default_size;
upload->alignment = alignment;
- upload->usage = usage;
+ upload->bind = bind;
upload->buffer = NULL;
return upload;
}
-/* Slightly specialized version of buffer_write designed to maximize
- * chances of the driver consolidating successive writes into a single
- * upload.
- *
- * dirty_size may be slightly greater than size to cope with
- * alignment. We don't want to leave holes between succesively mapped
- * regions as that may prevent the driver from consolidating uploads.
- *
- * Note that the 'data' pointer has probably come from the application
- * and we cannot read even a byte past its end without risking
- * segfaults, or at least complaints from valgrind..
- */
-static INLINE enum pipe_error
-my_buffer_write(struct pipe_context *pipe,
- struct pipe_resource *buf,
- unsigned offset, unsigned size, unsigned dirty_size,
- const void *data)
-{
- struct pipe_transfer *transfer = NULL;
- uint8_t *map;
-
- assert(offset < buf->width0);
- assert(offset + size <= buf->width0);
- assert(dirty_size >= size);
- assert(size);
-
- map = pipe_buffer_map_range(pipe, buf, offset, dirty_size,
- PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_FLUSH_EXPLICIT |
- PIPE_TRANSFER_DISCARD |
- PIPE_TRANSFER_UNSYNCHRONIZED,
- &transfer);
- if (map == NULL)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- memcpy(map + offset, data, size);
- pipe_buffer_flush_mapped_range(pipe, transfer, offset, dirty_size);
- pipe_buffer_unmap(pipe, buf, transfer);
-
- return PIPE_OK;
-}
-
/* Release old buffer.
*
* This must usually be called prior to firing the command stream
@@ -124,6 +83,16 @@ my_buffer_write(struct pipe_context *pipe,
*/
void u_upload_flush( struct u_upload_mgr *upload )
{
+ /* Unmap and unreference the upload buffer. */
+ if (upload->transfer) {
+ if (upload->offset) {
+ pipe_buffer_flush_mapped_range(upload->pipe, upload->transfer,
+ 0, upload->offset);
+ }
+ pipe_transfer_unmap(upload->pipe, upload->transfer);
+ pipe_transfer_destroy(upload->pipe, upload->transfer);
+ upload->transfer = NULL;
+ }
pipe_resource_reference( &upload->buffer, NULL );
upload->size = 0;
}
@@ -142,7 +111,7 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
{
unsigned size;
- /* Release old buffer, if present:
+ /* Release the old buffer, if present:
*/
u_upload_flush( upload );
@@ -151,10 +120,18 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
size = align(MAX2(upload->default_size, min_size), 4096);
upload->buffer = pipe_buffer_create( upload->pipe->screen,
- upload->usage,
+ upload->bind,
+ PIPE_USAGE_STREAM,
size );
if (upload->buffer == NULL)
goto fail;
+
+ /* Map the new buffer. */
+ upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
+ 0, size,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_FLUSH_EXPLICIT,
+ &upload->transfer);
upload->size = size;
@@ -168,38 +145,62 @@ fail:
return PIPE_ERROR_OUT_OF_MEMORY;
}
-
-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
- unsigned size,
- const void *data,
- unsigned *out_offset,
- struct pipe_resource **outbuf )
+enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
+ unsigned min_out_offset,
+ unsigned size,
+ unsigned *out_offset,
+ struct pipe_resource **outbuf,
+ boolean *flushed,
+ void **ptr )
{
unsigned alloc_size = align( size, upload->alignment );
- enum pipe_error ret = PIPE_OK;
+ unsigned alloc_offset = align(min_out_offset, upload->alignment);
+ unsigned offset;
- if (upload->offset + alloc_size > upload->size) {
- ret = u_upload_alloc_buffer( upload, alloc_size );
+ /* Make sure we have enough space in the upload buffer
+ * for the sub-allocation. */
+ if (MAX2(upload->offset, alloc_offset) + alloc_size > upload->size) {
+ enum pipe_error ret = u_upload_alloc_buffer(upload,
+ alloc_offset + alloc_size);
if (ret)
return ret;
+
+ *flushed = TRUE;
+ } else {
+ *flushed = FALSE;
}
- /* Copy the data, using map_range if available:
- */
- ret = my_buffer_write( upload->pipe,
- upload->buffer,
- upload->offset,
- size,
- alloc_size,
- data );
+ offset = MAX2(upload->offset, alloc_offset);
+
+ assert(offset < upload->buffer->width0);
+ assert(offset + size <= upload->buffer->width0);
+ assert(size);
+
+ /* Emit the return values: */
+ *ptr = upload->map + offset;
+ pipe_resource_reference( outbuf, upload->buffer );
+ *out_offset = offset;
+
+ upload->offset = offset + alloc_size;
+ return PIPE_OK;
+}
+
+enum pipe_error u_upload_data( struct u_upload_mgr *upload,
+ unsigned min_out_offset,
+ unsigned size,
+ const void *data,
+ unsigned *out_offset,
+ struct pipe_resource **outbuf,
+ boolean *flushed )
+{
+ uint8_t *ptr;
+ enum pipe_error ret = u_upload_alloc(upload, min_out_offset, size,
+ out_offset, outbuf, flushed,
+ (void**)&ptr);
if (ret)
return ret;
- /* Emit the return values:
- */
- pipe_resource_reference( outbuf, upload->buffer );
- *out_offset = upload->offset;
- upload->offset += alloc_size;
+ memcpy(ptr, data, size);
return PIPE_OK;
}
@@ -210,11 +211,13 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
* renders or DrawElements calls.
*/
enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
+ unsigned min_out_offset,
unsigned offset,
unsigned size,
struct pipe_resource *inbuf,
unsigned *out_offset,
- struct pipe_resource **outbuf )
+ struct pipe_resource **outbuf,
+ boolean *flushed )
{
enum pipe_error ret = PIPE_OK;
struct pipe_transfer *transfer = NULL;
@@ -233,17 +236,16 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
if (0)
debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size);
- ret = u_upload_data( upload,
+ ret = u_upload_data( upload,
+ min_out_offset,
size,
map + offset,
out_offset,
- outbuf );
- if (ret)
- goto done;
+ outbuf, flushed );
done:
if (map)
- pipe_buffer_unmap( upload->pipe, inbuf, transfer );
+ pipe_buffer_unmap( upload->pipe, transfer );
return ret;
}
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index de016df02e..c9a2ffeb57 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -32,15 +32,28 @@
#ifndef U_UPLOAD_MGR_H
#define U_UPLOAD_MGR_H
+#include "pipe/p_compiler.h"
+
struct pipe_context;
struct pipe_resource;
+/**
+ * Create the upload manager.
+ *
+ * \param pipe Pipe driver.
+ * \param default_size Minimum size of the upload buffer, in bytes.
+ * \param alignment Alignment of each suballocation in the upload buffer.
+ * \param bind Bitmask of PIPE_BIND_* flags.
+ */
struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
unsigned default_size,
unsigned alignment,
- unsigned usage );
+ unsigned bind );
+/**
+ * Destroy the upload manager.
+ */
void u_upload_destroy( struct u_upload_mgr *upload );
/* Unmap and release old buffer.
@@ -53,20 +66,55 @@ void u_upload_destroy( struct u_upload_mgr *upload );
*/
void u_upload_flush( struct u_upload_mgr *upload );
+/**
+ * Sub-allocate new memory from the upload buffer.
+ *
+ * \param upload Upload manager
+ * \param min_out_offset Minimum offset that should be returned in out_offset.
+ * \param size Size of the allocation.
+ * \param out_offset Pointer to where the new buffer offset will be returned.
+ * \param outbuf Pointer to where the upload buffer will be returned.
+ * \param flushed Whether the upload buffer was flushed.
+ * \param ptr Pointer to the allocated memory that is returned.
+ */
+enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
+ unsigned min_out_offset,
+ unsigned size,
+ unsigned *out_offset,
+ struct pipe_resource **outbuf,
+ boolean *flushed,
+ void **ptr );
+
+/**
+ * Allocate and write data to the upload buffer.
+ *
+ * Same as u_upload_alloc, but in addition to that, it copies "data"
+ * to the pointer returned from u_upload_alloc.
+ */
enum pipe_error u_upload_data( struct u_upload_mgr *upload,
+ unsigned min_out_offset,
unsigned size,
const void *data,
unsigned *out_offset,
- struct pipe_resource **outbuf );
+ struct pipe_resource **outbuf,
+ boolean *flushed );
+/**
+ * Allocate and copy an input buffer to the upload buffer.
+ *
+ * Same as u_upload_data, except that the input data comes from a buffer
+ * instead of a user pointer.
+ */
enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
+ unsigned min_out_offset,
unsigned offset,
unsigned size,
struct pipe_resource *inbuf,
unsigned *out_offset,
- struct pipe_resource **outbuf );
+ struct pipe_resource **outbuf,
+ boolean *flushed );
diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c
new file mode 100644
index 0000000000..521ac07747
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c
@@ -0,0 +1,609 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Marek Olšák <maraeo@gmail.com>
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_vbuf_mgr.h"
+
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+/* Hardware vertex fetcher limitations can be described by this structure. */
+struct u_vbuf_caps {
+ /* Vertex format CAPs. */
+ /* TRUE if hardware supports it. */
+ unsigned format_fixed32:1; /* PIPE_FORMAT_*32*_FIXED */
+ unsigned format_float16:1; /* PIPE_FORMAT_*16*_FLOAT */
+ unsigned format_float64:1; /* PIPE_FORMAT_*64*_FLOAT */
+ unsigned format_norm32:1; /* PIPE_FORMAT_*32*NORM */
+ unsigned format_scaled32:1; /* PIPE_FORMAT_*32*SCALED */
+
+ /* Whether vertex fetches don't have to be dword-aligned. */
+ /* TRUE if hardware supports it. */
+ unsigned fetch_dword_unaligned:1;
+};
+
+struct u_vbuf_mgr_elements {
+ unsigned count;
+ struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
+
+ /* If (velem[i].src_format != real_format[i]), the vertex buffer
+ * referenced by the vertex element cannot be used for rendering and
+ * its vertex data must be translated to real_format[i]. */
+ enum pipe_format native_format[PIPE_MAX_ATTRIBS];
+ unsigned native_format_size[PIPE_MAX_ATTRIBS];
+
+ /* This might mean two things:
+ * - src_format != native_format, as discussed above.
+ * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
+ boolean incompatible_layout;
+};
+
+struct u_vbuf_mgr_priv {
+ struct u_vbuf_mgr b;
+ struct u_vbuf_caps caps;
+ struct pipe_context *pipe;
+
+ struct translate_cache *translate_cache;
+ unsigned translate_vb_slot;
+
+ struct u_vbuf_mgr_elements *ve;
+ void *saved_ve, *fallback_ve;
+ boolean ve_binding_lock;
+
+ boolean any_user_vbs;
+ boolean incompatible_vb_layout;
+};
+
+static void u_vbuf_mgr_init_format_caps(struct u_vbuf_mgr_priv *mgr)
+{
+ struct pipe_screen *screen = mgr->pipe->screen;
+
+ mgr->caps.format_fixed32 =
+ screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER,
+ 0, PIPE_BIND_VERTEX_BUFFER);
+
+ mgr->caps.format_float16 =
+ screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER,
+ 0, PIPE_BIND_VERTEX_BUFFER);
+
+ mgr->caps.format_float64 =
+ screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER,
+ 0, PIPE_BIND_VERTEX_BUFFER);
+
+ mgr->caps.format_norm32 =
+ screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER,
+ 0, PIPE_BIND_VERTEX_BUFFER) &&
+ screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER,
+ 0, PIPE_BIND_VERTEX_BUFFER);
+
+ mgr->caps.format_scaled32 =
+ screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER,
+ 0, PIPE_BIND_VERTEX_BUFFER) &&
+ screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER,
+ 0, PIPE_BIND_VERTEX_BUFFER);
+}
+
+struct u_vbuf_mgr *
+u_vbuf_mgr_create(struct pipe_context *pipe,
+ unsigned upload_buffer_size,
+ unsigned upload_buffer_alignment,
+ unsigned upload_buffer_bind,
+ enum u_fetch_alignment fetch_alignment)
+{
+ struct u_vbuf_mgr_priv *mgr = CALLOC_STRUCT(u_vbuf_mgr_priv);
+
+ mgr->pipe = pipe;
+ mgr->translate_cache = translate_cache_create();
+
+ mgr->b.uploader = u_upload_create(pipe, upload_buffer_size,
+ upload_buffer_alignment,
+ upload_buffer_bind);
+
+ mgr->caps.fetch_dword_unaligned =
+ fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED;
+
+ u_vbuf_mgr_init_format_caps(mgr);
+
+ return &mgr->b;
+}
+
+void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgrb)
+{
+ struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+ unsigned i;
+
+ for (i = 0; i < mgr->b.nr_real_vertex_buffers; i++) {
+ pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, NULL);
+ pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL);
+ }
+
+ translate_cache_destroy(mgr->translate_cache);
+ u_upload_destroy(mgr->b.uploader);
+ FREE(mgr);
+}
+
+
+static void u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr,
+ int min_index, int max_index,
+ boolean *upload_flushed)
+{
+ struct translate_key key;
+ struct translate_element *te;
+ unsigned tr_elem_index[PIPE_MAX_ATTRIBS];
+ struct translate *tr;
+ boolean vb_translated[PIPE_MAX_ATTRIBS] = {0};
+ uint8_t *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map;
+ struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
+ struct pipe_resource *out_buffer = NULL;
+ unsigned i, num_verts, out_offset;
+ struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
+
+ memset(&key, 0, sizeof(key));
+ memset(tr_elem_index, 0xff, sizeof(tr_elem_index));
+
+ /* Initialize the translate key, i.e. the recipe how vertices should be
+ * translated. */
+ memset(&key, 0, sizeof key);
+ for (i = 0; i < mgr->ve->count; i++) {
+ struct pipe_vertex_buffer *vb =
+ &mgr->b.vertex_buffer[mgr->ve->ve[i].vertex_buffer_index];
+ enum pipe_format output_format = mgr->ve->native_format[i];
+ unsigned output_format_size = mgr->ve->native_format_size[i];
+
+ /* Check for support. */
+ if (mgr->ve->ve[i].src_format == mgr->ve->native_format[i] &&
+ (mgr->caps.fetch_dword_unaligned ||
+ (vb->buffer_offset % 4 == 0 &&
+ vb->stride % 4 == 0 &&
+ mgr->ve->ve[i].src_offset % 4 == 0))) {
+ continue;
+ }
+
+ /* Workaround for translate: output floats instead of halfs. */
+ switch (output_format) {
+ case PIPE_FORMAT_R16_FLOAT:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ output_format_size = 4;
+ break;
+ case PIPE_FORMAT_R16G16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ output_format_size = 8;
+ break;
+ case PIPE_FORMAT_R16G16B16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ output_format_size = 12;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ output_format_size = 16;
+ break;
+ default:;
+ }
+
+ /* Add this vertex element. */
+ te = &key.element[key.nr_elements];
+ /*te->type;
+ te->instance_divisor;*/
+ te->input_buffer = mgr->ve->ve[i].vertex_buffer_index;
+ te->input_format = mgr->ve->ve[i].src_format;
+ te->input_offset = mgr->ve->ve[i].src_offset;
+ te->output_format = output_format;
+ te->output_offset = key.output_stride;
+
+ key.output_stride += output_format_size;
+ vb_translated[mgr->ve->ve[i].vertex_buffer_index] = TRUE;
+ tr_elem_index[i] = key.nr_elements;
+ key.nr_elements++;
+ }
+
+ /* Get a translate object. */
+ tr = translate_cache_find(mgr->translate_cache, &key);
+
+ /* Map buffers we want to translate. */
+ for (i = 0; i < mgr->b.nr_vertex_buffers; i++) {
+ if (vb_translated[i]) {
+ struct pipe_vertex_buffer *vb = &mgr->b.vertex_buffer[i];
+
+ vb_map[i] = pipe_buffer_map(mgr->pipe, vb->buffer,
+ PIPE_TRANSFER_READ, &vb_transfer[i]);
+
+ tr->set_buffer(tr, i,
+ vb_map[i] + vb->buffer_offset + vb->stride * min_index,
+ vb->stride, ~0);
+ }
+ }
+
+ /* Create and map the output buffer. */
+ num_verts = max_index + 1 - min_index;
+
+ u_upload_alloc(mgr->b.uploader,
+ key.output_stride * min_index,
+ key.output_stride * num_verts,
+ &out_offset, &out_buffer, upload_flushed,
+ (void**)&out_map);
+
+ out_offset -= key.output_stride * min_index;
+
+ /* Translate. */
+ tr->run(tr, 0, num_verts, 0, out_map);
+
+ /* Unmap all buffers. */
+ for (i = 0; i < mgr->b.nr_vertex_buffers; i++) {
+ if (vb_translated[i]) {
+ pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
+ }
+ }
+
+ /* Setup the new vertex buffer in the first free slot. */
+ mgr->translate_vb_slot = ~0;
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (!mgr->b.vertex_buffer[i].buffer) {
+ mgr->translate_vb_slot = i;
+
+ if (i >= mgr->b.nr_vertex_buffers) {
+ mgr->b.nr_real_vertex_buffers = i+1;
+ }
+ break;
+ }
+ }
+
+ if (mgr->translate_vb_slot != ~0) {
+ /* Setup the new vertex buffer. */
+ pipe_resource_reference(
+ &mgr->b.real_vertex_buffer[mgr->translate_vb_slot], out_buffer);
+ mgr->b.vertex_buffer[mgr->translate_vb_slot].buffer_offset = out_offset;
+ mgr->b.vertex_buffer[mgr->translate_vb_slot].stride = key.output_stride;
+
+ /* Setup new vertex elements. */
+ for (i = 0; i < mgr->ve->count; i++) {
+ if (tr_elem_index[i] < key.nr_elements) {
+ te = &key.element[tr_elem_index[i]];
+ new_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
+ new_velems[i].src_format = te->output_format;
+ new_velems[i].src_offset = te->output_offset;
+ new_velems[i].vertex_buffer_index = mgr->translate_vb_slot;
+ } else {
+ memcpy(&new_velems[i], &mgr->ve->ve[i],
+ sizeof(struct pipe_vertex_element));
+ }
+ }
+
+ mgr->fallback_ve =
+ mgr->pipe->create_vertex_elements_state(mgr->pipe, mgr->ve->count,
+ new_velems);
+
+ /* Preserve saved_ve. */
+ mgr->ve_binding_lock = TRUE;
+ mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->fallback_ve);
+ mgr->ve_binding_lock = FALSE;
+ }
+
+ pipe_resource_reference(&out_buffer, NULL);
+}
+
+static void u_vbuf_translate_end(struct u_vbuf_mgr_priv *mgr)
+{
+ if (mgr->fallback_ve == NULL) {
+ return;
+ }
+
+ /* Restore vertex elements. */
+ /* Note that saved_ve will be overwritten in bind_vertex_elements_state. */
+ mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->saved_ve);
+ mgr->pipe->delete_vertex_elements_state(mgr->pipe, mgr->fallback_ve);
+ mgr->fallback_ve = NULL;
+
+ /* Delete the now-unused VBO. */
+ pipe_resource_reference(&mgr->b.real_vertex_buffer[mgr->translate_vb_slot],
+ NULL);
+ mgr->b.nr_real_vertex_buffers = mgr->b.nr_vertex_buffers;
+}
+
+#define FORMAT_REPLACE(what, withwhat) \
+ case PIPE_FORMAT_##what: format = PIPE_FORMAT_##withwhat; break
+
+struct u_vbuf_mgr_elements *
+u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb,
+ unsigned count,
+ const struct pipe_vertex_element *attribs,
+ struct pipe_vertex_element *native_attribs)
+{
+ struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+ unsigned i;
+ struct u_vbuf_mgr_elements *ve = CALLOC_STRUCT(u_vbuf_mgr_elements);
+
+ ve->count = count;
+
+ if (!count) {
+ return ve;
+ }
+
+ memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
+ memcpy(native_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
+
+ /* Set the best native format in case the original format is not
+ * supported. */
+ for (i = 0; i < count; i++) {
+ enum pipe_format format = ve->ve[i].src_format;
+
+ /* Choose a native format.
+ * For now we don't care about the alignment, that's going to
+ * be sorted out later. */
+ if (!mgr->caps.format_fixed32) {
+ switch (format) {
+ FORMAT_REPLACE(R32_FIXED, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT);
+ default:;
+ }
+ }
+ if (!mgr->caps.format_float16) {
+ switch (format) {
+ FORMAT_REPLACE(R16_FLOAT, R32_FLOAT);
+ FORMAT_REPLACE(R16G16_FLOAT, R32G32_FLOAT);
+ FORMAT_REPLACE(R16G16B16_FLOAT, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R16G16B16A16_FLOAT, R32G32B32A32_FLOAT);
+ default:;
+ }
+ }
+ if (!mgr->caps.format_float64) {
+ switch (format) {
+ FORMAT_REPLACE(R64_FLOAT, R32_FLOAT);
+ FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT);
+ FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT);
+ default:;
+ }
+ }
+ if (!mgr->caps.format_norm32) {
+ switch (format) {
+ FORMAT_REPLACE(R32_UNORM, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT);
+ FORMAT_REPLACE(R32_SNORM, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT);
+ default:;
+ }
+ }
+ if (!mgr->caps.format_scaled32) {
+ switch (format) {
+ FORMAT_REPLACE(R32_USCALED, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT);
+ FORMAT_REPLACE(R32_SSCALED, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT);
+ default:;
+ }
+ }
+
+ native_attribs[i].src_format = format;
+ ve->native_format[i] = format;
+ ve->native_format_size[i] =
+ util_format_get_blocksize(ve->native_format[i]);
+
+ ve->incompatible_layout =
+ ve->incompatible_layout ||
+ ve->ve[i].src_format != ve->native_format[i] ||
+ (!mgr->caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0);
+ }
+
+ /* Align the formats to the size of DWORD if needed. */
+ if (!mgr->caps.fetch_dword_unaligned) {
+ for (i = 0; i < count; i++) {
+ ve->native_format_size[i] = align(ve->native_format_size[i], 4);
+ }
+ }
+
+ return ve;
+}
+
+void u_vbuf_mgr_bind_vertex_elements(struct u_vbuf_mgr *mgrb,
+ void *cso,
+ struct u_vbuf_mgr_elements *ve)
+{
+ struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+
+ if (!cso) {
+ return;
+ }
+
+ if (!mgr->ve_binding_lock) {
+ mgr->saved_ve = cso;
+ mgr->ve = ve;
+ }
+}
+
+void u_vbuf_mgr_destroy_vertex_elements(struct u_vbuf_mgr *mgr,
+ struct u_vbuf_mgr_elements *ve)
+{
+ FREE(ve);
+}
+
+void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb,
+ unsigned count,
+ const struct pipe_vertex_buffer *bufs)
+{
+ struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+ unsigned i;
+
+ mgr->b.max_index = ~0;
+ mgr->any_user_vbs = FALSE;
+ mgr->incompatible_vb_layout = FALSE;
+
+ if (!mgr->caps.fetch_dword_unaligned) {
+ /* Check if the strides and offsets are aligned to the size of DWORD. */
+ for (i = 0; i < count; i++) {
+ if (bufs[i].buffer) {
+ if (bufs[i].stride % 4 != 0 ||
+ bufs[i].buffer_offset % 4 != 0) {
+ mgr->incompatible_vb_layout = TRUE;
+ break;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < count; i++) {
+ const struct pipe_vertex_buffer *vb = &bufs[i];
+
+ pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, vb->buffer);
+ pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL);
+
+ if (u_vbuf_resource(vb->buffer)->user_ptr) {
+ mgr->any_user_vbs = TRUE;
+ continue;
+ }
+
+ pipe_resource_reference(&mgr->b.real_vertex_buffer[i], vb->buffer);
+
+ /* The stride of zero means we will be fetching only the first
+ * vertex, so don't care about max_index. */
+ if (!vb->stride) {
+ continue;
+ }
+
+ /* Update the maximum index. */
+ mgr->b.max_index =
+ MIN2(mgr->b.max_index,
+ (vb->buffer->width0 - vb->buffer_offset) / vb->stride);
+ }
+
+ for (; i < mgr->b.nr_real_vertex_buffers; i++) {
+ pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, NULL);
+ pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL);
+ }
+
+ memcpy(mgr->b.vertex_buffer, bufs,
+ sizeof(struct pipe_vertex_buffer) * count);
+
+ mgr->b.nr_vertex_buffers = count;
+ mgr->b.nr_real_vertex_buffers = count;
+}
+
+static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr,
+ int min_index, int max_index,
+ unsigned instance_count,
+ boolean *upload_flushed)
+{
+ int i, nr = mgr->ve->count;
+ unsigned count = max_index + 1 - min_index;
+ boolean uploaded[PIPE_MAX_ATTRIBS] = {0};
+
+ for (i = 0; i < nr; i++) {
+ unsigned index = mgr->ve->ve[i].vertex_buffer_index;
+ struct pipe_vertex_buffer *vb = &mgr->b.vertex_buffer[index];
+
+ if (vb->buffer &&
+ u_vbuf_resource(vb->buffer)->user_ptr &&
+ !uploaded[index]) {
+ unsigned first, size;
+ boolean flushed;
+ unsigned instance_div = mgr->ve->ve[i].instance_divisor;
+
+ if (instance_div) {
+ first = 0;
+ size = vb->stride *
+ ((instance_count + instance_div - 1) / instance_div);
+ } else if (vb->stride) {
+ first = vb->stride * min_index;
+ size = vb->stride * count;
+ } else {
+ first = 0;
+ size = mgr->ve->native_format_size[i];
+ }
+
+ u_upload_data(mgr->b.uploader, first, size,
+ u_vbuf_resource(vb->buffer)->user_ptr + first,
+ &vb->buffer_offset,
+ &mgr->b.real_vertex_buffer[index],
+ &flushed);
+
+ vb->buffer_offset -= first;
+
+ uploaded[index] = TRUE;
+ *upload_flushed = *upload_flushed || flushed;
+ } else {
+ assert(mgr->b.real_vertex_buffer[index]);
+ }
+ }
+}
+
+void u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb,
+ const struct pipe_draw_info *info,
+ boolean *buffers_updated,
+ boolean *uploader_flushed)
+{
+ struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+ boolean bufs_updated = FALSE, upload_flushed = FALSE;
+ int min_index, max_index;
+
+ min_index = info->min_index - info->index_bias;
+ max_index = info->max_index - info->index_bias;
+
+ /* Translate vertices with non-native layouts or formats. */
+ if (mgr->incompatible_vb_layout || mgr->ve->incompatible_layout) {
+ u_vbuf_translate_begin(mgr, min_index, max_index, &upload_flushed);
+
+ if (mgr->fallback_ve) {
+ bufs_updated = TRUE;
+ }
+ }
+
+ /* Upload user buffers. */
+ if (mgr->any_user_vbs) {
+ u_vbuf_upload_buffers(mgr, min_index, max_index, info->instance_count,
+ &upload_flushed);
+ bufs_updated = TRUE;
+ }
+
+ /* Set the return values. */
+ if (buffers_updated) {
+ *buffers_updated = bufs_updated;
+ }
+ if (uploader_flushed) {
+ *uploader_flushed = upload_flushed;
+ }
+}
+
+void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgrb)
+{
+ struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+
+ if (mgr->fallback_ve) {
+ u_vbuf_translate_end(mgr);
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h
new file mode 100644
index 0000000000..8b241854c8
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h
@@ -0,0 +1,121 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Marek Olšák <maraeo@gmail.com>
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_VBUF_MGR_H
+#define U_VBUF_MGR_H
+
+/* This module builds upon u_upload_mgr and translate_cache and takes care of
+ * user buffer uploads and vertex format fallbacks. It's designed
+ * for the drivers which don't always use the Draw module. (e.g. for HWTCL)
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_transfer.h"
+
+/* The manager.
+ * This structure should also be used to access vertex buffers
+ * from a driver. */
+struct u_vbuf_mgr {
+ /* This is what was set in set_vertex_buffers.
+ * May contain user buffers. */
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_buffers;
+
+ /* Contains only real vertex buffers.
+ * Hardware drivers should use real_vertex_buffers[i]
+ * instead of vertex_buffers[i].buffer. */
+ struct pipe_resource *real_vertex_buffer[PIPE_MAX_ATTRIBS];
+ int nr_real_vertex_buffers;
+
+ /* Precomputed max_index for hardware vertex buffers. */
+ int max_index;
+
+ /* This uploader can optionally be used by the driver.
+ *
+ * Allowed functions:
+ * - u_upload_alloc
+ * - u_upload_data
+ * - u_upload_buffer
+ * - u_upload_flush */
+ struct u_upload_mgr *uploader;
+};
+
+struct u_vbuf_resource {
+ struct u_resource b;
+ uint8_t *user_ptr;
+};
+
+/* Opaque type containing information about vertex elements for the manager. */
+struct u_vbuf_mgr_elements;
+
+enum u_fetch_alignment {
+ U_VERTEX_FETCH_BYTE_ALIGNED,
+ U_VERTEX_FETCH_DWORD_ALIGNED
+};
+
+
+struct u_vbuf_mgr *
+u_vbuf_mgr_create(struct pipe_context *pipe,
+ unsigned upload_buffer_size,
+ unsigned upload_buffer_alignment,
+ unsigned upload_buffer_bind,
+ enum u_fetch_alignment fetch_alignment);
+
+void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgr);
+
+struct u_vbuf_mgr_elements *
+u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgr,
+ unsigned count,
+ const struct pipe_vertex_element *attrs,
+ struct pipe_vertex_element *native_attrs);
+
+void u_vbuf_mgr_bind_vertex_elements(struct u_vbuf_mgr *mgr,
+ void *cso,
+ struct u_vbuf_mgr_elements *ve);
+
+void u_vbuf_mgr_destroy_vertex_elements(struct u_vbuf_mgr *mgr,
+ struct u_vbuf_mgr_elements *ve);
+
+void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgr,
+ unsigned count,
+ const struct pipe_vertex_buffer *bufs);
+
+void u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgr,
+ const struct pipe_draw_info *info,
+ boolean *buffers_updated,
+ boolean *uploader_flushed);
+
+void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgr);
+
+
+static INLINE struct u_vbuf_resource *u_vbuf_resource(struct pipe_resource *r)
+{
+ return (struct u_vbuf_resource*)r;
+}
+
+#endif
diff --git a/src/gallium/docs/d3d11ddi.txt b/src/gallium/docs/d3d11ddi.txt
index 11e7719089..b5a06b4b94 100644
--- a/src/gallium/docs/d3d11ddi.txt
+++ b/src/gallium/docs/d3d11ddi.txt
@@ -337,7 +337,6 @@ IaSetTopology
+ Gallium supports line loops, triangle fans, quads, quad strips and polygons
IaSetVertexBuffers -> set_vertex_buffers
- + Gallium allows to specify a max_index here
- Gallium only allows setting all vertex buffers at once, while D3D11 supports setting a subset
OpenResource -> texture_from_handle
@@ -454,8 +453,8 @@ SetVertexPipelineOutput (D3D10.1+ only)
SetViewports
- Gallium lacks support for multiple geometry-shader-selectable viewports D3D11 has
-ShaderResourceViewReadAfterWriteHazard -> flush(PIPE_FLUSH_RENDER_CACHE)
- - Gallium does not support specifying this per-render-target/view
+ShaderResourceViewReadAfterWriteHazard
+ - Gallium lacks support for this
SoSetTargets -> set_stream_output_buffers
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index c33cf7c573..b0229c70f2 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -3,8 +3,12 @@
Context
=======
-The context object represents the purest, most directly accessible, abilities
-of the device's 3D rendering pipeline.
+A Gallium rendering context encapsulates the state which effects 3D
+rendering such as blend state, depth/stencil state, texture samplers,
+etc.
+
+Note that resource/texture allocation is not per-context but per-screen.
+
Methods
-------
@@ -12,20 +16,23 @@ Methods
CSO State
^^^^^^^^^
-All CSO state is created, bound, and destroyed, with triplets of methods that
-all follow a specific naming scheme. For example, ``create_blend_state``,
-``bind_blend_state``, and ``destroy_blend_state``.
+All Constant State Object (CSO) state is created, bound, and destroyed,
+with triplets of methods that all follow a specific naming scheme.
+For example, ``create_blend_state``, ``bind_blend_state``, and
+``destroy_blend_state``.
CSO objects handled by the context object:
* :ref:`Blend`: ``*_blend_state``
-* :ref:`Sampler`: These are special; they can be bound to either vertex or
- fragment samplers, and they are bound in groups.
- ``bind_fragment_sampler_states``, ``bind_vertex_sampler_states``
+* :ref:`Sampler`: Texture sampler states are bound separately for fragment,
+ vertex and geometry samplers. Note that sampler states are set en masse.
+ If M is the max number of sampler units supported by the driver and N
+ samplers are bound with ``bind_fragment_sampler_states`` then sampler
+ units N..M-1 are considered disabled/NULL.
* :ref:`Rasterizer`: ``*_rasterizer_state``
* :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state``
-* :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for
- fragment shaders, and ``*_vs_state`` is for vertex shaders.
+* :ref:`Shader`: These are create, bind and destroy methods for vertex,
+ fragment and geometry shaders.
* :ref:`Vertex Elements`: ``*_vertex_elements_state``
@@ -47,6 +54,9 @@ buffers, surfaces) are bound to the driver.
* ``set_index_buffer``
+* ``set_stream_output_buffers``
+
+
Non-CSO State
^^^^^^^^^^^^^
@@ -96,7 +106,9 @@ to the array index which is used for sampling.
* ``set_fragment_sampler_views`` binds an array of sampler views to
fragment shader stage. Every binding point acquires a reference
to a respective sampler view and releases a reference to the previous
- sampler view.
+ sampler view. If M is the maximum number of sampler units and N units
+ is passed to set_fragment_sampler_views, the driver should unbind the
+ sampler views for units N..M-1.
* ``set_vertex_sampler_views`` binds an array of sampler views to vertex
shader stage. Every binding point acquires a reference to a respective
@@ -232,9 +244,16 @@ The most common type of query is the occlusion query,
``PIPE_QUERY_OCCLUSION_COUNTER``, which counts the number of fragments which
are written to the framebuffer without being culled by
:ref:`Depth, Stencil, & Alpha` testing or shader KILL instructions.
+The result is an unsigned 64-bit integer.
+In cases where a boolean result of an occlusion query is enough,
+``PIPE_QUERY_OCCLUSION_PREDICATE`` should be used. It is just like
+``PIPE_QUERY_OCCLUSION_COUNTER`` except that the result is a boolean
+value of FALSE for cases where COUNTER would result in 0 and TRUE
+for all other cases.
Another type of query, ``PIPE_QUERY_TIME_ELAPSED``, returns the amount of
time, in nanoseconds, the context takes to perform operations.
+The result is an unsigned 64-bit integer.
Gallium does not guarantee the availability of any query types; one must
always check the capabilities of the :ref:`Screen` first.
@@ -347,6 +366,22 @@ Any pointers into the map should be considered invalid and discarded.
Basically get_transfer, transfer_map, data write, transfer_unmap, and
transfer_destroy all in one.
+
+The box parameter to some of these functions defines a 1D, 2D or 3D
+region of pixels. This is self-explanatory for 1D, 2D and 3D texture
+targets.
+
+For PIPE_TEXTURE_1D_ARRAY, the box::y and box::height fields refer to the
+array dimension of the texture.
+
+For PIPE_TEXTURE_2D_ARRAY, the box::z and box::depth fields refer to the
+array dimension of the texture.
+
+For PIPE_TEXTURE_CUBE, the box:z and box::depth fields refer to the
+faces of the cube map (z + depth <= 6).
+
+
+
.. _transfer_flush_region:
transfer_flush_region
@@ -357,6 +392,32 @@ be flushed on write or unmap. Flushes must be requested with
``transfer_flush_region``. Flush ranges are relative to the mapped range, not
the beginning of the resource.
+
+
+.. _redefine_user_buffer:
+
+redefine_user_buffer
+%%%%%%%%%%%%%%%%%%%%
+
+This function notifies a driver that the user buffer content has been changed.
+The updated region starts at ``offset`` and is ``size`` bytes large.
+The ``offset`` is relative to the pointer specified in ``user_buffer_create``.
+While uploading the user buffer, the driver is allowed not to upload
+the memory outside of this region.
+The width0 is redefined to ``MAX2(width0, offset+size)``.
+
+
+
+.. _texture_barrier
+
+texture_barrier
+%%%%%%%%%%%%%%%
+
+This function flushes all pending writes to the currently-set surfaces and
+invalidates all read caches of the currently-set samplers.
+
+
+
.. _pipe_transfer:
PIPE_TRANSFER
@@ -364,16 +425,32 @@ PIPE_TRANSFER
These flags control the behavior of a transfer object.
-* ``READ``: resource contents are read at transfer create time.
-* ``WRITE``: resource contents will be written back at transfer destroy time.
-* ``MAP_DIRECTLY``: a transfer should directly map the resource. May return
- NULL if not supported.
-* ``DISCARD``: The memory within the mapped region is discarded.
- Cannot be used with ``READ``.
-* ``DONTBLOCK``: Fail if the resource cannot be mapped immediately.
-* ``UNSYNCHRONIZED``: Do not synchronize pending operations on the resource
- when mapping. The interaction of any writes to the map and any
- operations pending on the resource are undefined. Cannot be used with
- ``READ``.
-* ``FLUSH_EXPLICIT``: Written ranges will be notified later with
- :ref:`transfer_flush_region`. Cannot be used with ``READ``.
+``PIPE_TRANSFER_READ``
+ Resource contents read back (or accessed directly) at transfer create time.
+
+``PIPE_TRANSFER_WRITE``
+ Resource contents will be written back at transfer_destroy time (or modified
+ as a result of being accessed directly).
+
+``PIPE_TRANSFER_MAP_DIRECTLY``
+ a transfer should directly map the resource. May return NULL if not supported.
+
+``PIPE_TRANSFER_DISCARD_RANGE``
+ The memory within the mapped region is discarded. Cannot be used with
+ ``PIPE_TRANSFER_READ``.
+
+``PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE``
+ Discards all memory backing the resource. It should not be used with
+ ``PIPE_TRANSFER_READ``.
+
+``PIPE_TRANSFER_DONTBLOCK``
+ Fail if the resource cannot be mapped immediately.
+
+``PIPE_TRANSFER_UNSYNCHRONIZED``
+ Do not synchronize pending operations on the resource when mapping. The
+ interaction of any writes to the map and any operations pending on the
+ resource are undefined. Cannot be used with ``PIPE_TRANSFER_READ``.
+
+``PIPE_TRANSFER_FLUSH_EXPLICIT``
+ Written ranges will be notified later with :ref:`transfer_flush_region`.
+ Cannot be used with ``PIPE_TRANSFER_READ``.
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 09edbaa673..976e75bed0 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -36,6 +36,7 @@ The integer capabilities:
bound.
* ``OCCLUSION_QUERY``: Whether occlusion queries are available.
* ``TIMER_QUERY``: Whether timer queries are available.
+* ``INSTANCED_DRAWING``: indicates support for instanced drawing.
* ``TEXTURE_SHADOW_MAP``: indicates whether the fragment shader hardware
can do the depth texture / Z comparison operation in TEX instructions
for shadow testing.
@@ -272,7 +273,7 @@ Modern APIs allow using buffers as shader resources.
**depth0** the depth of the base mip level of the texture
(1 for everything else).
-**array_size the array size for 1D and 2D array textures.
+**array_size** the array size for 1D and 2D array textures.
For cube maps this must be 6, for other textures 1.
**last_level** the last mip map level present.
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index d99ed7c6d6..4debcc6ecc 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -577,17 +577,45 @@ This instruction replicates its result.
.. opcode:: TEX - Texture Lookup
- TBD
+.. math::
+
+ coord = src0
+
+ bias = 0.0
+
+ dst = texture_sample(unit, coord, bias)
.. opcode:: TXD - Texture Lookup with Derivatives
- TBD
+.. math::
+
+ coord = src0
+
+ ddx = src1
+
+ ddy = src2
+
+ bias = 0.0
+
+ dst = texture_sample_deriv(unit, coord, bias, ddx, ddy)
.. opcode:: TXP - Projective Texture Lookup
- TBD
+.. math::
+
+ coord.x = src0.x / src.w
+
+ coord.y = src0.y / src.w
+
+ coord.z = src0.z / src.w
+
+ coord.w = src0.w
+
+ bias = 0.0
+
+ dst = texture_sample(unit, coord, bias)
.. opcode:: UP2H - Unpack Two 16-Bit Floats
@@ -729,7 +757,19 @@ This instruction replicates its result.
.. opcode:: TXB - Texture Lookup With Bias
- TBD
+.. math::
+
+ coord.x = src.x
+
+ coord.y = src.y
+
+ coord.z = src.z
+
+ coord.w = 1.0
+
+ bias = src.z
+
+ dst = texture_sample(unit, coord, bias)
.. opcode:: NRM - 3-component Vector Normalise
@@ -767,9 +807,21 @@ This instruction replicates its result.
dst = src0.x \times src1.x + src0.y \times src1.y
-.. opcode:: TXL - Texture Lookup With LOD
+.. opcode:: TXL - Texture Lookup With explicit LOD
- TBD
+.. math::
+
+ coord.x = src0.x
+
+ coord.y = src0.y
+
+ coord.z = src0.z
+
+ coord.w = 1.0
+
+ lod = src0.w
+
+ dst = texture_sample(unit, coord, lod)
.. opcode:: BRK - Break
@@ -1198,6 +1250,157 @@ This opcode is the inverse of :opcode:`DFRACEXP`.
dst.zw = \sqrt{src.zw}
+.. _resourceopcodes:
+
+Resource Access Opcodes
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Those opcodes follow very closely semantics of the respective Direct3D
+instructions. If in doubt double check Direct3D documentation.
+
+.. opcode:: LOAD - Simplified alternative to the "SAMPLE" instruction.
+ Using the provided integer address, LOAD fetches data
+ from the specified buffer/texture without any filtering.
+ The source data may come from any resource type other
+ than CUBE.
+ LOAD dst, address, resource
+ e.g.
+ LOAD TEMP[0], TEMP[1], RES[0]
+ The 'address' is specified as unsigned integers. If the
+ 'address' is out of range [0...(# texels - 1)] the
+ result of the fetch is always 0 in all components.
+ As such the instruction doesn't honor address wrap
+ modes, in cases where that behavior is desirable
+ 'sample' instruction should be used.
+ address.w always provides an unsigned integer mipmap
+ level. If the value is out of the range then the
+ instruction always returns 0 in all components.
+ address.yz are ignored for buffers and 1d textures.
+ address.z is ignored for 1d texture arrays and 2d
+ textures.
+ For 1D texture arrays address.y provides the array
+ index (also as unsigned integer). If the value is
+ out of the range of available array indices
+ [0... (array size - 1)] then the opcode always returns
+ 0 in all components.
+ For 2D texture arrays address.z provides the array
+ index, otherwise it exhibits the same behavior as in
+ the case for 1D texture arrays.
+ The exeact semantics of the source address are presented
+ in the table below:
+ resource type X Y Z W
+ ------------- ------------------------
+ PIPE_BUFFER x ignored
+ PIPE_TEXTURE_1D x mpl
+ PIPE_TEXTURE_2D x y mpl
+ PIPE_TEXTURE_3D x y z mpl
+ PIPE_TEXTURE_RECT x y mpl
+ PIPE_TEXTURE_CUBE not allowed as source
+ PIPE_TEXTURE_1D_ARRAY x idx mpl
+ PIPE_TEXTURE_2D_ARRAY x y idx mpl
+
+ Where 'mpl' is a mipmap level and 'idx' is the
+ array index.
+
+
+.. opcode:: LOAD_MS - Just like LOAD but allows fetch data from
+ multi-sampled surfaces.
+
+.. opcode:: SAMPLE - Using provided address, sample data from the
+ specified texture using the filtering mode identified
+ by the gven sampler. The source data may come from
+ any resource type other than buffers.
+ SAMPLE dst, address, resource, sampler
+ e.g.
+ SAMPLE TEMP[0], TEMP[1], RES[0], SAMP[0]
+
+.. opcode:: SAMPLE_B - Just like the SAMPLE instruction with the
+ exception that an additiona bias is applied to the
+ level of detail computed as part of the instruction
+ execution.
+ SAMPLE_B dst, address, resource, sampler, lod_bias
+ e.g.
+ SAMPLE_B TEMP[0], TEMP[1], RES[0], SAMP[0], TEMP[2].x
+
+.. opcode:: SAMPLE_C - Similar to the SAMPLE instruction but it
+ performs a comparison filter. The operands to SAMPLE_C
+ are identical to SAMPLE, except that tere is an additional
+ float32 operand, reference value, which must be a register
+ with single-component, or a scalar literal.
+ SAMPLE_C makes the hardware use the current samplers
+ compare_func (in pipe_sampler_state) to compare
+ reference value against the red component value for the
+ surce resource at each texel that the currently configured
+ texture filter covers based on the provided coordinates.
+ SAMPLE_C dst, address, resource.r, sampler, ref_value
+ e.g.
+ SAMPLE_C TEMP[0], TEMP[1], RES[0].r, SAMP[0], TEMP[2].x
+
+.. opcode:: SAMPLE_C_LZ - Same as SAMPLE_C, but LOD is 0 and derivatives
+ are ignored. The LZ stands for level-zero.
+ SAMPLE_C_LZ dst, address, resource.r, sampler, ref_value
+ e.g.
+ SAMPLE_C_LZ TEMP[0], TEMP[1], RES[0].r, SAMP[0], TEMP[2].x
+
+
+.. opcode:: SAMPLE_D - SAMPLE_D is identical to the SAMPLE opcode except
+ that the derivatives for the source address in the x
+ direction and the y direction are provided by extra
+ parameters.
+ SAMPLE_D dst, address, resource, sampler, der_x, der_y
+ e.g.
+ SAMPLE_D TEMP[0], TEMP[1], RES[0], SAMP[0], TEMP[2], TEMP[3]
+
+.. opcode:: SAMPLE_L - SAMPLE_L is identical to the SAMPLE opcode except
+ that the LOD is provided directly as a scalar value,
+ representing no anisotropy. Source addresses A channel
+ is used as the LOD.
+ SAMPLE_L dst, address, resource, sampler
+ e.g.
+ SAMPLE_L TEMP[0], TEMP[1], RES[0], SAMP[0]
+
+
+.. opcode:: GATHER4 - Gathers the four texels to be used in a bi-linear
+ filtering operation and packs them into a single register.
+ Only woth with 2D, 2D array, cubemaps, and cubemaps arrays.
+ For 2D textures, only the addressing modes of the sampler and
+ the top level of any mip pyramid are used. Set W to zero.
+ It behaves like the SAMPLE instruction, but a filtered
+ sample is not generated. The four samples that contribute
+ to filtering are places into xyzw in cunter-clockwise order,
+ starting with the (u,v) texture coordinate delta at the
+ following locations (-, +), (+, +), (+, -), (-, -), where
+ the magnitude of the deltas are half a texel.
+
+
+.. opcode:: RESINFO - query the dimensions of a given input buffer.
+ dst receives width, height, depth or array size and
+ number of mipmap levels. The dst can have a writemask
+ which will specify what info is the caller interested
+ in.
+ RESINFO dst, src_mip_level, resource
+ e.g.
+ RESINFO TEMP[0], TEMP[1].x, RES[0]
+ src_mip_level is an unsigned integer scalar. If it's
+ out of range then returns 0 for width, height and
+ depth/array size but the total number of mipmap is
+ still returned correctly for the given resource.
+ The returned width, height and depth values are for
+ the mipmap level selected by the src_mip_level and
+ are in the number of texels.
+ For 1d texture array width is in dst.x, array size
+ is in dst.y and dst.zw are always 0.
+
+.. opcode:: SAMPLE_POS - query the position of a given sample.
+ dst receives float4 (x, y, 0, 0) indicated where the
+ sample is located. If the resource is not a multi-sample
+ resource and not a render target, the result is 0.
+
+.. opcode:: SAMPLE_INFO - dst receives number of samples in x.
+ If the resource is not a multi-sample resource and
+ not a render target, the result is 0.
+
+
Explanation of symbols used
------------------------------
@@ -1280,6 +1483,8 @@ wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X
is set to 1, the X component should be interpolated according to cylindrical
wrapping rules.
+If file is TGSI_FILE_RESOURCE, a Declaration Resource token follows.
+
Declaration Semantic
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1423,6 +1628,23 @@ is a writable stencil reference value. Only the Y component is writable.
This allows the fragment shader to change the fragments stencilref value.
+Declaration Resource
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+ Follows Declaration token if file is TGSI_FILE_RESOURCE.
+
+ DCL RES[#], resource, type(s)
+
+ Declares a shader input resource and assigns it to a RES[#]
+ register.
+
+ resource can be one of BUFFER, 1D, 2D, 3D, CUBE, 1DArray and
+ 2DArray.
+
+ type must be 1 or 4 entries (if specifying on a per-component
+ level) out of UNORM, SNORM, SINT, UINT and FLOAT.
+
+
Properties
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1464,6 +1686,11 @@ GL_ARB_fragment_coord_conventions extension.
DirectX 9 uses INTEGER.
DirectX 10 uses HALF_INTEGER.
+FS_COLOR0_WRITES_ALL_CBUFS
+""""""""""""""""""""""""""
+Specifies that writes to the fragment shader color 0 are replicated to all
+bound cbufs. This facilitates OpenGL's fragColor output vs fragData[0] where
+fragData is directed to a single color buffer, but fragColor is broadcast.
Texture Sampling and Texture Formats
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
index b6b3a700cd..58e647a39f 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -37,6 +37,7 @@
#include "pipe/p_format.h"
#include "util/u_memory.h"
#include "pipe/p_screen.h"
+#include "util/u_inlines.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
@@ -61,6 +62,11 @@ static void
cell_destroy_context( struct pipe_context *pipe )
{
struct cell_context *cell = cell_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < cell->num_vertex_buffers; i++) {
+ pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL);
+ }
util_delete_keymap(cell->fragment_ops_cache, NULL);
@@ -98,18 +104,6 @@ static const struct debug_named_value cell_debug_flags[] = {
DEBUG_NAMED_VALUE_END
};
-static unsigned int
-cell_is_resource_referenced( struct pipe_context *pipe,
- struct pipe_resource *texture,
- unsigned level, int layer)
-{
- /**
- * FIXME: Optimize.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
struct pipe_context *
cell_create_context(struct pipe_screen *screen,
@@ -134,8 +128,6 @@ cell_create_context(struct pipe_screen *screen,
cell->pipe.clear = cell_clear;
cell->pipe.flush = cell_flush;
- cell->pipe.is_resource_referenced = cell_is_resource_referenced;
-
#if 0
cell->pipe.begin_query = cell_begin_query;
cell->pipe.end_query = cell_end_query;
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c
index e7c9fc46d9..181fef44f4 100644
--- a/src/gallium/drivers/cell/ppu/cell_fence.c
+++ b/src/gallium/drivers/cell/ppu/cell_fence.c
@@ -59,9 +59,10 @@ cell_fence_signalled(const struct cell_context *cell,
}
-void
+boolean
cell_fence_finish(const struct cell_context *cell,
- const struct cell_fence *fence)
+ const struct cell_fence *fence,
+ uint64_t timeout)
{
while (!cell_fence_signalled(cell, fence)) {
usleep(10);
@@ -75,6 +76,7 @@ cell_fence_finish(const struct cell_context *cell,
}
}
#endif
+ return TRUE;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h
index 536b4ba411..3568230b1c 100644
--- a/src/gallium/drivers/cell/ppu/cell_fence.h
+++ b/src/gallium/drivers/cell/ppu/cell_fence.h
@@ -36,12 +36,15 @@ cell_fence_init(struct cell_fence *fence);
extern boolean
cell_fence_signalled(const struct cell_context *cell,
- const struct cell_fence *fence);
+ const struct cell_fence *fence,
+ unsigned flags);
-extern void
+extern boolean
cell_fence_finish(const struct cell_context *cell,
- const struct cell_fence *fence);
+ const struct cell_fence *fence,
+ unsigned flags,
+ uint64_t timeout);
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c
index 8275c9dc9c..463f4d03eb 100644
--- a/src/gallium/drivers/cell/ppu/cell_flush.c
+++ b/src/gallium/drivers/cell/ppu/cell_flush.c
@@ -38,19 +38,16 @@
* Called via pipe->flush()
*/
void
-cell_flush(struct pipe_context *pipe, unsigned flags,
+cell_flush(struct pipe_context *pipe,
struct pipe_fence_handle **fence)
{
struct cell_context *cell = cell_context(pipe);
if (fence) {
*fence = NULL;
- /* XXX: Implement real fencing */
- flags |= CELL_FLUSH_WAIT;
}
- if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE))
- flags |= CELL_FLUSH_WAIT;
+ flags |= CELL_FLUSH_WAIT;
draw_flush( cell->draw );
cell_flush_int(cell, flags);
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c
index 8d2b4b9643..0ee124a24f 100644
--- a/src/gallium/drivers/cell/ppu/cell_screen.c
+++ b/src/gallium/drivers/cell/ppu/cell_screen.c
@@ -149,8 +149,7 @@ cell_is_format_supported( struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage,
- unsigned geom_flags )
+ unsigned tex_usage)
{
struct sw_winsys *winsys = cell_screen(screen)->winsys;
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
index a065d68b5a..7f65b82619 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -33,6 +33,7 @@
#include "cell_state.h"
#include "util/u_memory.h"
+#include "util/u_transfer.h"
#include "draw/draw_context.h"
@@ -82,8 +83,9 @@ cell_set_vertex_buffers(struct pipe_context *pipe,
assert(count <= PIPE_MAX_ATTRIBS);
- memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0]));
- cell->num_vertex_buffers = count;
+ util_copy_vertex_buffers(cell->vertex_buffer,
+ &cell->num_vertex_buffers,
+ buffers, count);
cell->dirty |= CELL_NEW_VERTEX;
@@ -114,4 +116,5 @@ cell_init_vertex_functions(struct cell_context *cell)
cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state;
cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state;
cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state;
+ cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c
index e4d289c8a4..0fefec9aae 100644
--- a/src/gallium/drivers/failover/fo_context.c
+++ b/src/gallium/drivers/failover/fo_context.c
@@ -29,6 +29,7 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "pipe/p_context.h"
+#include "util/u_inlines.h"
#include "fo_context.h"
#include "fo_winsys.h"
@@ -38,6 +39,11 @@
static void failover_destroy( struct pipe_context *pipe )
{
struct failover_context *failover = failover_context( pipe );
+ unsigned i;
+
+ for (i = 0; i < failover->num_vertex_buffers; i++) {
+ pipe_resource_reference(&failover->vertex_buffers[i].buffer, NULL);
+ }
FREE( failover );
}
@@ -73,7 +79,7 @@ static void failover_draw_vbo( struct pipe_context *pipe,
if (failover->mode == FO_SW) {
if (failover->dirty) {
- failover->hw->flush( failover->hw, ~0, NULL );
+ failover->hw->flush( failover->hw, NULL );
failover_state_emit( failover );
}
@@ -83,22 +89,10 @@ static void failover_draw_vbo( struct pipe_context *pipe,
* intervening flush. Unlikely to be much performance impact to
* this:
*/
- failover->sw->flush( failover->sw, ~0, NULL );
+ failover->sw->flush( failover->sw, NULL );
}
}
-static unsigned int
-failover_is_resource_referenced( struct pipe_context *_pipe,
- struct pipe_resource *resource,
- unsigned level, int layer)
-{
- struct failover_context *failover = failover_context( _pipe );
- struct pipe_context *pipe = (failover->mode == FO_HW) ?
- failover->hw : failover->sw;
-
- return pipe->is_resource_referenced(pipe, resource, level, layer);
-}
-
struct pipe_context *failover_create( struct pipe_context *hw,
struct pipe_context *sw )
{
@@ -144,7 +138,6 @@ struct pipe_context *failover_create( struct pipe_context *hw,
#endif
failover->pipe.flush = hw->flush;
- failover->pipe.is_resource_referenced = failover_is_resource_referenced;
failover->dirty = 0;
diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c
index c265f381b6..b4da1b8b90 100644
--- a/src/gallium/drivers/failover/fo_state.c
+++ b/src/gallium/drivers/failover/fo_state.c
@@ -30,6 +30,7 @@
#include "util/u_inlines.h"
#include "util/u_memory.h"
+#include "util/u_transfer.h"
#include "fo_context.h"
@@ -574,10 +575,10 @@ failover_set_vertex_buffers(struct pipe_context *pipe,
{
struct failover_context *failover = failover_context(pipe);
- memcpy(failover->vertex_buffers, vertex_buffers,
- count * sizeof(vertex_buffers[0]));
+ util_copy_vertex_buffers(failover->vertex_buffers,
+ &failover->num_vertex_buffers,
+ vertex_buffers, count);
failover->dirty |= FO_NEW_VERTEX_BUFFER;
- failover->num_vertex_buffers = count;
failover->sw->set_vertex_buffers( failover->sw, count, vertex_buffers );
failover->hw->set_vertex_buffers( failover->hw, count, vertex_buffers );
}
@@ -656,4 +657,5 @@ failover_init_state_functions( struct failover_context *failover )
failover->pipe.set_constant_buffer = failover_set_constant_buffer;
failover->pipe.create_sampler_view = failover_create_sampler_view;
failover->pipe.sampler_view_destroy = failover_sampler_view_destroy;
+ failover->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c
index a572ad22bd..813a21e2ee 100644
--- a/src/gallium/drivers/galahad/glhd_context.c
+++ b/src/gallium/drivers/galahad/glhd_context.c
@@ -381,6 +381,8 @@ galahad_create_vertex_elements_state(struct pipe_context *_pipe,
struct galahad_context *glhd_pipe = galahad_context(_pipe);
struct pipe_context *pipe = glhd_pipe->pipe;
+ /* XXX check if stride lines up with element size, at least for floats */
+
return pipe->create_vertex_elements_state(pipe,
num_elements,
vertex_elements);
@@ -759,34 +761,15 @@ galahad_clear_depth_stencil(struct pipe_context *_pipe,
static void
galahad_flush(struct pipe_context *_pipe,
- unsigned flags,
struct pipe_fence_handle **fence)
{
struct galahad_context *glhd_pipe = galahad_context(_pipe);
struct pipe_context *pipe = glhd_pipe->pipe;
pipe->flush(pipe,
- flags,
fence);
}
-static unsigned int
-galahad_is_resource_referenced(struct pipe_context *_pipe,
- struct pipe_resource *_resource,
- unsigned level,
- int layer)
-{
- struct galahad_context *glhd_pipe = galahad_context(_pipe);
- struct galahad_resource *glhd_resource = galahad_resource(_resource);
- struct pipe_context *pipe = glhd_pipe->pipe;
- struct pipe_resource *resource = glhd_resource->resource;
-
- return pipe->is_resource_referenced(pipe,
- resource,
- level,
- layer);
-}
-
static struct pipe_sampler_view *
galahad_context_create_sampler_view(struct pipe_context *_pipe,
struct pipe_resource *_resource,
@@ -960,6 +943,19 @@ galahad_context_transfer_inline_write(struct pipe_context *_context,
}
+static void galahad_redefine_user_buffer(struct pipe_context *_context,
+ struct pipe_resource *_resource,
+ unsigned offset, unsigned size)
+{
+ struct galahad_context *glhd_context = galahad_context(_context);
+ struct galahad_resource *glhd_resource = galahad_resource(_resource);
+ struct pipe_context *context = glhd_context->pipe;
+ struct pipe_resource *resource = glhd_resource->resource;
+
+ context->redefine_user_buffer(context, resource, offset, size);
+}
+
+
struct pipe_context *
galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
{
@@ -1023,7 +1019,6 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
glhd_pipe->base.clear_render_target = galahad_clear_render_target;
glhd_pipe->base.clear_depth_stencil = galahad_clear_depth_stencil;
glhd_pipe->base.flush = galahad_flush;
- glhd_pipe->base.is_resource_referenced = galahad_is_resource_referenced;
glhd_pipe->base.create_sampler_view = galahad_context_create_sampler_view;
glhd_pipe->base.sampler_view_destroy = galahad_context_sampler_view_destroy;
glhd_pipe->base.create_surface = galahad_context_create_surface;
@@ -1034,6 +1029,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
glhd_pipe->base.transfer_unmap = galahad_context_transfer_unmap;
glhd_pipe->base.transfer_flush_region = galahad_context_transfer_flush_region;
glhd_pipe->base.transfer_inline_write = galahad_context_transfer_inline_write;
+ glhd_pipe->base.redefine_user_buffer = galahad_redefine_user_buffer;
glhd_pipe->pipe = pipe;
diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c
index b4825bef66..b4edebe492 100644
--- a/src/gallium/drivers/galahad/glhd_screen.c
+++ b/src/gallium/drivers/galahad/glhd_screen.c
@@ -106,8 +106,7 @@ galahad_screen_is_format_supported(struct pipe_screen *_screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage,
- unsigned geom_flags)
+ unsigned tex_usage)
{
struct galahad_screen *glhd_screen = galahad_screen(_screen);
struct pipe_screen *screen = glhd_screen->screen;
@@ -120,8 +119,7 @@ galahad_screen_is_format_supported(struct pipe_screen *_screen,
format,
target,
sample_count,
- tex_usage,
- geom_flags);
+ tex_usage);
}
static struct pipe_context *
@@ -276,30 +274,28 @@ galahad_screen_fence_reference(struct pipe_screen *_screen,
fence);
}
-static int
+static boolean
galahad_screen_fence_signalled(struct pipe_screen *_screen,
- struct pipe_fence_handle *fence,
- unsigned flags)
+ struct pipe_fence_handle *fence)
{
struct galahad_screen *glhd_screen = galahad_screen(_screen);
struct pipe_screen *screen = glhd_screen->screen;
return screen->fence_signalled(screen,
- fence,
- flags);
+ fence);
}
-static int
+static boolean
galahad_screen_fence_finish(struct pipe_screen *_screen,
struct pipe_fence_handle *fence,
- unsigned flags)
+ uint64_t timeout)
{
struct galahad_screen *glhd_screen = galahad_screen(_screen);
struct pipe_screen *screen = glhd_screen->screen;
return screen->fence_finish(screen,
fence,
- flags);
+ timeout);
}
struct pipe_screen *
diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO
index 94c428bebf..fba180064c 100644
--- a/src/gallium/drivers/i915/TODO
+++ b/src/gallium/drivers/i915/TODO
@@ -1,25 +1,30 @@
Random list of problems with i915g:
+- Check if PIPE_CAP_BLEND_EQUATION_SEPARATE work, the code is there.
+ If not fix it! A simple task, good for beginners.
+
+- Add support for PIPE_CAP_POINT_SPRITE either via the draw module or directly
+ via the hardware, look at the classic driver, more advanced.
+
+- What does this button do? Figure out LIS7 with regards to depth offset.
+
- Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally
unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still
broken, it doesn't update the viewport/get new buffers.
-- Tends to hang the chip after a few minutes of openarena. Looks tiling related,
- at the last frame rendered has tiling corruption over the complete frame.
-
- Kills the chip in 3D_PRIMITIVE LINELIST with mesa-demos/fbotexture in
- wireframe mode.
-
-- Tiling is funny: If unlucky, it renders/samples all black. No clue yet what's
- going on. Seems to depend on tiny details like whethever the sampler
- relocation is fenced/unfenced (broken _with_ fenced reloc using tiling bits!).
+ wireframe mode. Changing the cullmode to cw from none mitigates the crash. As
+ does emitting only one line segment (2 indices) per 3D_PRIMITIVE command in
+ the batch.
- Y-tiling is even more fun. i915c doesn't use it, maybe there's a reason?
Texture sampling from Y-tiled buffers seems to work, though (save above
problems).
+ RESOLVED: Y-tiling works with the render engine, but not with the blitter.
+ Use u_blitter and hw clears (PRIM3D_CLEAR_RECT).
-- Need to validate buffers before usage. Currently do_exec on the batchbuffer
- can fail with -ENOSPC.
+- src/xvmc/i915_structs.h in xf86-video-intel has a few more bits of various
+ commands defined. Scavenge them and see what's useful.
Other bugs can be found here:
https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g
diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h
index 6e93da7620..ce2691b2fd 100644
--- a/src/gallium/drivers/i915/i915_batch.h
+++ b/src/gallium/drivers/i915/i915_batch.h
@@ -31,12 +31,15 @@
#include "i915_batchbuffer.h"
-#define BEGIN_BATCH(dwords, relocs) \
- (i915_winsys_batchbuffer_check(i915->batch, dwords, relocs))
+#define BEGIN_BATCH(dwords) \
+ (i915_winsys_batchbuffer_check(i915->batch, dwords))
#define OUT_BATCH(dword) \
i915_winsys_batchbuffer_dword(i915->batch, dword)
+#define OUT_BATCH_F(f) \
+ i915_winsys_batchbuffer_float(i915->batch, f)
+
#define OUT_RELOC(buf, usage, offset) \
i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, false)
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h
index d92b2ccb31..7855403478 100644
--- a/src/gallium/drivers/i915/i915_batchbuffer.h
+++ b/src/gallium/drivers/i915/i915_batchbuffer.h
@@ -41,11 +41,9 @@ i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch)
static INLINE boolean
i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch,
- size_t dwords,
- size_t relocs)
+ size_t dwords)
{
- return dwords * 4 <= i915_winsys_batchbuffer_space(batch) &&
- relocs <= (batch->max_relocs - batch->relocs);
+ return dwords * 4 <= i915_winsys_batchbuffer_space(batch);
}
static INLINE void
@@ -57,6 +55,16 @@ i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch,
}
static INLINE void
+i915_winsys_batchbuffer_float(struct i915_winsys_batchbuffer *batch,
+ float f)
+{
+ union { float f; unsigned int ui; } uif;
+ uif.f = f;
+ assert (i915_winsys_batchbuffer_space(batch) >= 4);
+ i915_winsys_batchbuffer_dword_unchecked(batch, uif.ui);
+}
+
+static INLINE void
i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch,
unsigned dword)
{
@@ -71,10 +79,18 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
{
assert (i915_winsys_batchbuffer_space(batch) >= size);
- memcpy(data, batch->ptr, size);
+ memcpy(batch->ptr, data, size);
batch->ptr += size;
}
+static INLINE boolean
+i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch,
+ struct i915_winsys_buffer **buffers,
+ int num_of_buffers)
+{
+ return batch->iws->validate_buffers(batch, buffers, num_of_buffers);
+}
+
static INLINE int
i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *buffer,
diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c
index 97c2566515..baaed3767f 100644
--- a/src/gallium/drivers/i915/i915_blit.c
+++ b/src/gallium/drivers/i915/i915_blit.c
@@ -49,6 +49,11 @@ i915_fill_blit(struct i915_context *i915,
I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
+ if(!i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)) {
+ FLUSH_BATCH(NULL);
+ assert(i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1));
+ }
+
switch (cpp) {
case 1:
case 2:
@@ -66,9 +71,9 @@ i915_fill_blit(struct i915_context *i915,
return;
}
- if (!BEGIN_BATCH(6, 1)) {
+ if (!BEGIN_BATCH(6)) {
FLUSH_BATCH(NULL);
- assert(BEGIN_BATCH(6, 1));
+ assert(BEGIN_BATCH(6));
}
OUT_BATCH(CMD);
OUT_BATCH(BR13);
@@ -76,6 +81,8 @@ i915_fill_blit(struct i915_context *i915,
OUT_BATCH(((y + h) << 16) | (x + w));
OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
OUT_BATCH(color);
+
+ i915_set_flush_dirty(i915, I915_FLUSH_CACHE);
}
void
@@ -94,6 +101,7 @@ i915_copy_blit(struct i915_context *i915,
unsigned CMD, BR13;
int dst_y2 = dst_y + h;
int dst_x2 = dst_x + w;
+ struct i915_winsys_buffer *buffers[2] = {src_buffer, dst_buffer};
I915_DBG(DBG_BLIT,
@@ -102,6 +110,11 @@ i915_copy_blit(struct i915_context *i915,
src_buffer, src_pitch, src_offset, src_x, src_y,
dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
+ if(!i915_winsys_validate_buffers(i915->batch, buffers, 2)) {
+ FLUSH_BATCH(NULL);
+ assert(i915_winsys_validate_buffers(i915->batch, buffers, 2));
+ }
+
switch (cpp) {
case 1:
case 2:
@@ -130,9 +143,9 @@ i915_copy_blit(struct i915_context *i915,
*/
assert (dst_pitch > 0 && src_pitch > 0);
- if (!BEGIN_BATCH(8, 2)) {
+ if (!BEGIN_BATCH(8)) {
FLUSH_BATCH(NULL);
- assert(BEGIN_BATCH(8, 2));
+ assert(BEGIN_BATCH(8));
}
OUT_BATCH(CMD);
OUT_BATCH(BR13);
@@ -142,4 +155,6 @@ i915_copy_blit(struct i915_context *i915,
OUT_BATCH((src_y << 16) | src_x);
OUT_BATCH(((int) src_pitch & 0xffff));
OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset);
+
+ i915_set_flush_dirty(i915, I915_FLUSH_CACHE);
}
diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c
index 6d824a507a..4a97746e98 100644
--- a/src/gallium/drivers/i915/i915_clear.c
+++ b/src/gallium/drivers/i915/i915_clear.c
@@ -31,17 +31,118 @@
#include "util/u_clear.h"
+#include "util/u_format.h"
+#include "util/u_pack_color.h"
#include "i915_context.h"
+#include "i915_screen.h"
+#include "i915_reg.h"
+#include "i915_batch.h"
+#include "i915_resource.h"
+#include "i915_state.h"
+void
+i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba,
+ double depth, unsigned stencil,
+ unsigned destx, unsigned desty, unsigned width, unsigned height)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ uint32_t clear_params, clear_color, clear_depth, clear_stencil,
+ clear_color8888, packed_z_stencil;
+ union util_color u_color;
+ float f_depth = depth;
+ struct i915_texture *cbuf_tex, *depth_tex;
+
+ cbuf_tex = depth_tex = NULL;
+ clear_params = 0;
+
+ if (buffers & PIPE_CLEAR_COLOR) {
+ struct pipe_surface *cbuf = i915->framebuffer.cbufs[0];
+
+ clear_params |= CLEARPARAM_WRITE_COLOR;
+ cbuf_tex = i915_texture(cbuf->texture);
+ util_pack_color(rgba, cbuf->format, &u_color);
+ if (util_format_get_blocksize(cbuf_tex->b.b.format) == 4)
+ clear_color = u_color.ui;
+ else
+ clear_color = (u_color.ui & 0xffff) | (u_color.ui << 16);
+
+ util_pack_color(rgba, PIPE_FORMAT_B8G8R8A8_UNORM, &u_color);
+ clear_color8888 = u_color.ui;
+ } else
+ clear_color = clear_color8888 = 0;
+
+ clear_depth = clear_stencil = 0;
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ struct pipe_surface *zbuf = i915->framebuffer.zsbuf;
+
+ clear_params |= CLEARPARAM_WRITE_DEPTH;
+ depth_tex = i915_texture(zbuf->texture);
+ packed_z_stencil = util_pack_z_stencil(depth_tex->b.b.format, depth, stencil);
+
+ if (util_format_get_blocksize(depth_tex->b.b.format) == 4) {
+ /* Avoid read-modify-write if there's no stencil. */
+ if (buffers & PIPE_CLEAR_STENCIL
+ || depth_tex->b.b.format != PIPE_FORMAT_Z24_UNORM_S8_USCALED) {
+ clear_params |= CLEARPARAM_WRITE_STENCIL;
+ clear_stencil = packed_z_stencil & 0xff;
+ clear_depth = packed_z_stencil;
+ } else
+ clear_depth = packed_z_stencil & 0xffffff00;
+ } else {
+ clear_depth = (clear_depth & 0xffff) | (clear_depth << 16);
+ }
+ }
+
+ if (i915->hardware_dirty)
+ i915_emit_hardware_state(i915);
+
+ if (!BEGIN_BATCH(7 + 7)) {
+ FLUSH_BATCH(NULL);
+
+ i915_emit_hardware_state(i915);
+ i915->vbo_flushed = 1;
+
+ assert(BEGIN_BATCH(7 + 7));
+ }
+
+ OUT_BATCH(_3DSTATE_CLEAR_PARAMETERS);
+ OUT_BATCH(clear_params | CLEARPARAM_CLEAR_RECT);
+ OUT_BATCH(clear_color);
+ OUT_BATCH(clear_depth);
+ OUT_BATCH(clear_color8888);
+ OUT_BATCH_F(f_depth);
+ OUT_BATCH(clear_stencil);
+
+ OUT_BATCH(_3DPRIMITIVE | PRIM3D_CLEAR_RECT | 5);
+ OUT_BATCH_F(destx + width);
+ OUT_BATCH_F(desty + height);
+ OUT_BATCH_F(destx);
+ OUT_BATCH_F(desty + height);
+ OUT_BATCH_F(destx);
+ OUT_BATCH_F(desty);
+}
/**
* Clear the given buffers to the specified values.
* No masking, no scissor (clear entire buffer).
*/
void
-i915_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
- double depth, unsigned stencil)
+i915_clear_blitter(struct pipe_context *pipe, unsigned buffers, const float *rgba,
+ double depth, unsigned stencil)
{
util_clear(pipe, &i915_context(pipe)->framebuffer, buffers, rgba, depth,
stencil);
}
+
+void
+i915_clear_render(struct pipe_context *pipe, unsigned buffers, const float *rgba,
+ double depth, unsigned stencil)
+{
+ struct i915_context *i915 = i915_context(pipe);
+
+ if (i915->dirty)
+ i915_update_derived(i915);
+
+ i915_clear_emit(pipe, buffers, rgba, depth, stencil,
+ 0, 0, i915->framebuffer.width, i915->framebuffer.height);
+}
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 847dd6dd47..7a98ef73c1 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -39,6 +39,9 @@
#include "pipe/p_screen.h"
+DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE)
+
+
/*
* Draw functions
*/
@@ -50,18 +53,17 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
struct i915_context *i915 = i915_context(pipe);
struct draw_context *draw = i915->draw;
void *mapped_indices = NULL;
- unsigned i;
+ unsigned cbuf_dirty;
- if (i915->dirty)
- i915_update_derived(i915);
/*
- * Map vertex buffers
+ * Ack vs contants here, helps ipers a lot.
*/
- for (i = 0; i < i915->num_vertex_buffers; i++) {
- void *buf = i915_buffer(i915->vertex_buffer[i].buffer)->data;
- draw_set_mapped_vertex_buffer(draw, i, buf);
- }
+ cbuf_dirty = i915->dirty & I915_NEW_VS_CONSTANTS;
+ i915->dirty &= ~I915_NEW_VS_CONSTANTS;
+
+ if (i915->dirty)
+ i915_update_derived(i915);
/*
* Map index buffer, if present
@@ -70,23 +72,21 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
mapped_indices = i915_buffer(i915->index_buffer.buffer)->data;
draw_set_mapped_index_buffer(draw, mapped_indices);
- draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
- i915->current.constants[PIPE_SHADER_VERTEX],
- (i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
- 4 * sizeof(float)));
+ if (cbuf_dirty) {
+ if (i915->constants[PIPE_SHADER_VERTEX])
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
+ i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data,
+ (i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
+ 4 * sizeof(float)));
+ else
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0);
+ }
/*
* Do the drawing
*/
draw_vbo(i915->draw, info);
- /*
- * unmap vertex/index buffers
- */
- for (i = 0; i < i915->num_vertex_buffers; i++) {
- draw_set_mapped_vertex_buffer(draw, i, NULL);
- }
-
if (mapped_indices)
draw_set_mapped_index_buffer(draw, NULL);
}
@@ -103,6 +103,9 @@ static void i915_destroy(struct pipe_context *pipe)
int i;
draw_destroy(i915->draw);
+
+ if (i915->blitter)
+ util_blitter_destroy(i915->blitter);
if(i915->batch)
i915->iws->batchbuffer_destroy(i915->batch);
@@ -113,6 +116,11 @@ static void i915_destroy(struct pipe_context *pipe)
}
pipe_surface_reference(&i915->framebuffer.zsbuf, NULL);
+ /* unbind constant buffers */
+ for (i = 0; i < PIPE_SHADER_TYPES; i++) {
+ pipe_resource_reference(&i915->constants[i], NULL);
+ }
+
FREE(i915);
}
@@ -132,16 +140,27 @@ i915_create_context(struct pipe_screen *screen, void *priv)
i915->base.destroy = i915_destroy;
- i915->base.clear = i915_clear;
+ if (i915_screen(screen)->debug.use_blitter)
+ i915->base.clear = i915_clear_blitter;
+ else
+ i915->base.clear = i915_clear_render;
i915->base.draw_vbo = i915_draw_vbo;
+ /* init this before draw */
+ util_slab_create(&i915->transfer_pool, sizeof(struct pipe_transfer),
+ 16, UTIL_SLAB_SINGLETHREADED);
+
+ /* Batch stream debugging is a bit hacked up at the moment:
+ */
+ i915->batch = i915->iws->batchbuffer_create(i915->iws);
+
/*
* Create drawing context and plug our rendering stage into it.
*/
i915->draw = draw_create(&i915->base);
assert(i915->draw);
- if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) {
+ if (!debug_get_option_i915_no_vbuf()) {
draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915));
} else {
draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915));
@@ -155,12 +174,19 @@ i915_create_context(struct pipe_screen *screen, void *priv)
draw_install_aaline_stage(i915->draw, &i915->base);
draw_install_aapoint_stage(i915->draw, &i915->base);
+ /* augmented draw pipeline clobbers state functions */
+ i915_init_fixup_state_functions(i915);
+
+ /* Create blitter last - calls state creation functions. */
+ i915->blitter = util_blitter_create(&i915->base);
+ assert(i915->blitter);
+
i915->dirty = ~0;
i915->hardware_dirty = ~0;
-
- /* Batch stream debugging is a bit hacked up at the moment:
- */
- i915->batch = i915->iws->batchbuffer_create(i915->iws);
+ i915->immediate_dirty = ~0;
+ i915->dynamic_dirty = ~0;
+ i915->static_dirty = ~0;
+ i915->flush_dirty = 0;
return &i915->base;
}
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 7103a1b8c1..dacf50e870 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -37,6 +37,9 @@
#include "tgsi/tgsi_scan.h"
+#include "util/u_slab.h"
+#include "util/u_blitter.h"
+
struct i915_winsys;
struct i915_winsys_buffer;
@@ -134,7 +137,6 @@ struct i915_state
unsigned immediate[I915_MAX_IMMEDIATE];
unsigned dynamic[I915_MAX_DYNAMIC];
- float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4];
/** number of constants passed in through a constant buffer */
uint num_user_constants[PIPE_SHADER_TYPES];
@@ -149,6 +151,15 @@ struct i915_state
/** Describes the current hardware vertex layout */
struct vertex_info vertex_info;
+ /* static state (dst/depth buffer state) */
+ struct i915_winsys_buffer *cbuf_bo;
+ unsigned cbuf_flags;
+ struct i915_winsys_buffer *depth_bo;
+ unsigned depth_flags;
+ unsigned dst_buf_vars;
+ uint32_t draw_offset;
+ uint32_t draw_size;
+
unsigned id; /* track lost context events */
};
@@ -175,7 +186,7 @@ struct i915_rasterizer_state {
unsigned LIS7;
unsigned sc[1];
- const struct pipe_rasterizer_state *templ;
+ struct pipe_rasterizer_state templ;
union { float f; unsigned u; } ds[2];
};
@@ -212,21 +223,18 @@ struct i915_context {
struct pipe_blend_color blend_color;
struct pipe_stencil_ref stencil_ref;
struct pipe_clip_state clip;
- /* XXX unneded */
struct pipe_resource *constants[PIPE_SHADER_TYPES];
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
struct pipe_viewport_state viewport;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_index_buffer index_buffer;
unsigned dirty;
unsigned num_samplers;
unsigned num_fragment_sampler_views;
- unsigned num_vertex_buffers;
struct i915_winsys_batchbuffer *batch;
@@ -237,6 +245,35 @@ struct i915_context {
struct i915_state current;
unsigned hardware_dirty;
+ unsigned immediate_dirty : I915_MAX_IMMEDIATE;
+ unsigned dynamic_dirty : I915_MAX_DYNAMIC;
+ unsigned static_dirty : 4;
+ unsigned flush_dirty : 2;
+
+ struct i915_winsys_buffer *validation_buffers[2 + 1 + I915_TEX_UNITS];
+ int num_validation_buffers;
+
+ struct util_slab_mempool transfer_pool;
+
+ /** blitter/hw-clear */
+ struct blitter_context* blitter;
+
+ /** State tracking needed by u_blitter for save/restore. */
+ void *saved_fs;
+ void (*saved_bind_fs_state)(struct pipe_context *pipe, void *shader);
+ void *saved_vs;
+ struct pipe_clip_state saved_clip;
+ struct i915_velems_state *saved_velems;
+ unsigned saved_nr_vertex_buffers;
+ struct pipe_vertex_buffer saved_vertex_buffers[PIPE_MAX_ATTRIBS];
+ unsigned saved_nr_samplers;
+ void *saved_samplers[PIPE_MAX_SAMPLERS];
+ void (*saved_bind_sampler_states)(struct pipe_context *pipe,
+ unsigned num, void **sampler);
+ unsigned saved_nr_sampler_views;
+ struct pipe_sampler_view *saved_sampler_views[PIPE_MAX_SAMPLERS];
+ void (*saved_set_sampler_views)(struct pipe_context *pipe,
+ unsigned num, struct pipe_sampler_view **views);
};
/* A flag for each state_tracker state object:
@@ -253,9 +290,11 @@ struct i915_context {
#define I915_NEW_DEPTH_STENCIL 0x200
#define I915_NEW_SAMPLER 0x400
#define I915_NEW_SAMPLER_VIEW 0x800
-#define I915_NEW_CONSTANTS 0x1000
-#define I915_NEW_VBO 0x2000
-#define I915_NEW_VS 0x4000
+#define I915_NEW_VS_CONSTANTS 0x1000
+#define I915_NEW_FS_CONSTANTS 0x2000
+#define I915_NEW_GS_CONSTANTS 0x4000
+#define I915_NEW_VBO 0x8000
+#define I915_NEW_VS 0x10000
/* Driver's internally generated state flags:
@@ -272,7 +311,25 @@ struct i915_context {
#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM)
#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS)
#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0))
-#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1))
+#define I915_HW_INVARIANT (1<<(I915_MAX_CACHE+1))
+#define I915_HW_FLUSH (1<<(I915_MAX_CACHE+1))
+
+/* hw flush handling */
+#define I915_FLUSH_CACHE 1
+#define I915_PIPELINE_FLUSH 2
+
+/* split up static state */
+#define I915_DST_BUF_COLOR 1
+#define I915_DST_BUF_DEPTH 2
+#define I915_DST_VARS 4
+#define I915_DST_RECT 8
+
+static INLINE
+void i915_set_flush_dirty(struct i915_context *i915, unsigned flush)
+{
+ i915->hardware_dirty |= I915_HW_FLUSH;
+ i915->flush_dirty |= flush;
+}
/***********************************************************************
@@ -297,14 +354,20 @@ void i915_emit_hardware_state(struct i915_context *i915 );
/***********************************************************************
* i915_clear.c:
*/
-void i915_clear( struct pipe_context *pipe, unsigned buffers, const float *rgba,
- double depth, unsigned stencil);
+void i915_clear_blitter(struct pipe_context *pipe, unsigned buffers, const float *rgba,
+ double depth, unsigned stencil);
+void i915_clear_render(struct pipe_context *pipe, unsigned buffers, const float *rgba,
+ double depth, unsigned stencil);
+void i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba,
+ double depth, unsigned stencil,
+ unsigned destx, unsigned desty, unsigned width, unsigned height);
/***********************************************************************
*
*/
void i915_init_state_functions( struct i915_context *i915 );
+void i915_init_fixup_state_functions( struct i915_context *i915 );
void i915_init_flush_functions( struct i915_context *i915 );
void i915_init_string_functions( struct i915_context *i915 );
diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c
index d7150c99c4..c4eed473e9 100644
--- a/src/gallium/drivers/i915/i915_debug.c
+++ b/src/gallium/drivers/i915/i915_debug.c
@@ -46,12 +46,18 @@ static const struct debug_named_value debug_options[] = {
};
unsigned i915_debug = 0;
-boolean i915_tiling = TRUE;
-void i915_debug_init(struct i915_screen *screen)
+DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0)
+DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(i915_lie, "I915_LIE", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(i915_use_blitter, "I915_USE_BLITTER", FALSE)
+
+void i915_debug_init(struct i915_screen *is)
{
- i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0);
- i915_tiling = !debug_get_bool_option("I915_NO_TILING", FALSE);
+ i915_debug = debug_get_option_i915_debug();
+ is->debug.tiling = !debug_get_option_i915_no_tiling();
+ is->debug.lie = debug_get_option_i915_lie();
+ is->debug.use_blitter = debug_get_option_i915_use_blitter();
}
@@ -948,7 +954,8 @@ i915_dump_dirty(struct i915_context *i915, const char *func)
{I915_NEW_DEPTH_STENCIL, "depth_stencil"},
{I915_NEW_SAMPLER, "sampler"},
{I915_NEW_SAMPLER_VIEW, "sampler_view"},
- {I915_NEW_CONSTANTS, "constants"},
+ {I915_NEW_VS_CONSTANTS, "vs_const"},
+ {I915_NEW_FS_CONSTANTS, "fs_const"},
{I915_NEW_VBO, "vbo"},
{I915_NEW_VS, "vs"},
{0, NULL},
@@ -976,7 +983,7 @@ i915_dump_hardware_dirty(struct i915_context *i915, const char *func)
{I915_HW_PROGRAM, "program"},
{I915_HW_CONSTANTS, "constants"},
{I915_HW_IMMEDIATE, "immediate"},
- {I915_HW_INVARIENT, "invarient"},
+ {I915_HW_INVARIANT, "invariant"},
{0, NULL},
};
int i;
diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h
index 11af7662f0..fa60799d0c 100644
--- a/src/gallium/drivers/i915/i915_debug.h
+++ b/src/gallium/drivers/i915/i915_debug.h
@@ -46,7 +46,6 @@ struct i915_winsys_batchbuffer;
#define DBG_CONSTANTS 0x20
extern unsigned i915_debug;
-extern boolean i915_tiling;
#ifdef DEBUG
static INLINE boolean
diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c
index a2c70b1199..b4e81147c4 100644
--- a/src/gallium/drivers/i915/i915_flush.c
+++ b/src/gallium/drivers/i915/i915_flush.c
@@ -39,34 +39,12 @@
static void i915_flush_pipe( struct pipe_context *pipe,
- unsigned flags,
struct pipe_fence_handle **fence )
{
struct i915_context *i915 = i915_context(pipe);
draw_flush(i915->draw);
-#if 0
- /* Do we need to emit an MI_FLUSH command to flush the hardware
- * caches?
- */
- if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) {
- unsigned flush = MI_FLUSH;
-
- if (!(flags & PIPE_FLUSH_RENDER_CACHE))
- flush |= INHIBIT_FLUSH_RENDER_CACHE;
-
- if (flags & PIPE_FLUSH_TEXTURE_CACHE)
- flush |= FLUSH_MAP_CACHE;
-
- if (!BEGIN_BATCH(1, 0)) {
- FLUSH_BATCH(NULL);
- assert(BEGIN_BATCH(1, 0));
- }
- OUT_BATCH( flush );
- }
-#endif
-
if (i915->batch->map == i915->batch->ptr) {
return;
}
@@ -74,7 +52,6 @@ static void i915_flush_pipe( struct pipe_context *pipe,
/* If there are no flags, just flush pending commands to hardware:
*/
FLUSH_BATCH(fence);
- i915->vbo_flushed = 1;
I915_DBG(DBG_FLUSH, "%s: #####\n", __FUNCTION__);
}
@@ -93,5 +70,11 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence)
struct i915_winsys_batchbuffer *batch = i915->batch;
batch->iws->batchbuffer_flush(batch, fence);
+ i915->vbo_flushed = 1;
i915->hardware_dirty = ~0;
+ i915->immediate_dirty = ~0;
+ i915->dynamic_dirty = ~0;
+ i915->static_dirty = ~0;
+ /* kernel emits flushes in between batchbuffers */
+ i915->flush_dirty = 0;
}
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 25c53210be..b145b58be3 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -924,6 +924,14 @@ i915_translate_instructions(struct i915_fp_compile *p,
tgsi_parse_token( &parse );
switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ /*
+ * We only support one cbuf, but we still need to ignore the property
+ * correctly so we don't hit the assert at the end of the switch case.
+ */
+ assert(parse.FullToken.FullProperty.Property.PropertyName ==
+ TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
+ break;
case TGSI_TOKEN_TYPE_DECLARATION:
if (parse.FullToken.FullDeclaration.Declaration.File
== TGSI_FILE_CONSTANT) {
@@ -1166,15 +1174,24 @@ void
i915_translate_fragment_program( struct i915_context *i915,
struct i915_fragment_shader *fs)
{
- struct i915_fp_compile *p = i915_init_compile(i915, fs);
+ struct i915_fp_compile *p;
const struct tgsi_token *tokens = fs->state.tokens;
- i915_find_wpos_space(p);
-
#if 0
tgsi_dump(tokens, 0);
#endif
+ /* hw doesn't seem to like empty frag programs, even when the depth write
+ * fixup gets emitted below - may that one is fishy, too? */
+ if (fs->info.num_instructions == 1) {
+ i915_use_passthrough_shader(fs);
+
+ return;
+ }
+
+ p = i915_init_compile(i915, fs);
+ i915_find_wpos_space(p);
+
i915_translate_instructions(p, tokens);
i915_fixup_depth_write(p);
diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c
index dd997e2cf4..85656cd784 100644
--- a/src/gallium/drivers/i915/i915_prim_emit.c
+++ b/src/gallium/drivers/i915/i915_prim_emit.c
@@ -144,15 +144,14 @@ emit_prim( struct draw_stage *stage,
vertex_size = i915->current.vertex_info.size * 4; /* in bytes */
assert(vertex_size >= 12); /* never smaller than 12 bytes */
- if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) {
+ if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) {
FLUSH_BATCH(NULL);
/* Make sure state is re-emitted after a flush:
*/
- i915_update_derived( i915 );
i915_emit_hardware_state( i915 );
- if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) {
+ if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) {
assert(0);
return;
}
diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c
index baebbc7bae..79db3b650e 100644
--- a/src/gallium/drivers/i915/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915/i915_prim_vbuf.c
@@ -181,6 +181,7 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
struct i915_winsys *iws = i915->iws;
if (i915_render->vbo) {
+ iws->buffer_unmap(iws, i915_render->vbo);
iws->buffer_destroy(iws, i915_render->vbo);
/*
* XXX If buffers where referenced then this should be done in
@@ -208,6 +209,7 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size,
I915_NEW_VERTEX);
+ i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE);
}
/**
@@ -262,16 +264,13 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render)
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
struct i915_context *i915 = i915_render->i915;
- struct i915_winsys *iws = i915->iws;
if (i915->vbo_flushed)
debug_printf("%s bad vbo flush occured stalling on hw\n", __FUNCTION__);
#ifdef VBUF_MAP_BUFFER
- i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE);
return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_sw_offset;
#else
- (void)iws;
return (unsigned char *)i915_render->vbo_ptr;
#endif
}
@@ -288,7 +287,7 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render,
i915_render->vbo_max_index = max_index;
i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1));
#ifdef VBUF_MAP_BUFFER
- iws->buffer_unmap(iws, i915_render->vbo);
+ (void)iws;
#else
i915_render->map_used_start = i915_render->vertex_size * min_index;
i915_render->map_used_end = i915_render->vertex_size * (max_index + 1);
@@ -466,16 +465,15 @@ draw_arrays_fallback(struct vbuf_render *render,
if (i915->hardware_dirty)
i915_emit_hardware_state(i915);
- if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+ if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
FLUSH_BATCH(NULL);
/* Make sure state is re-emitted after a flush:
*/
- i915_update_derived(i915);
i915_emit_hardware_state(i915);
i915->vbo_flushed = 1;
- if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+ if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
assert(0);
goto out;
}
@@ -515,16 +513,15 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render,
if (i915->hardware_dirty)
i915_emit_hardware_state(i915);
- if (!BEGIN_BATCH(2, 0)) {
+ if (!BEGIN_BATCH(2)) {
FLUSH_BATCH(NULL);
/* Make sure state is re-emitted after a flush:
*/
- i915_update_derived(i915);
i915_emit_hardware_state(i915);
i915->vbo_flushed = 1;
- if (!BEGIN_BATCH(2, 0)) {
+ if (!BEGIN_BATCH(2)) {
assert(0);
goto out;
}
@@ -636,16 +633,15 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render,
if (i915->hardware_dirty)
i915_emit_hardware_state(i915);
- if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+ if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
FLUSH_BATCH(NULL);
/* Make sure state is re-emitted after a flush:
*/
- i915_update_derived(i915);
i915_emit_hardware_state(i915);
i915->vbo_flushed = 1;
- if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+ if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
assert(0);
goto out;
}
@@ -684,6 +680,15 @@ static void
i915_vbuf_render_destroy(struct vbuf_render *render)
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+ struct i915_winsys *iws = i915->iws;
+
+ if (i915_render->vbo) {
+ i915->vbo = NULL;
+ iws->buffer_unmap(iws, i915_render->vbo);
+ iws->buffer_destroy(iws, i915_render->vbo);
+ }
+
FREE(i915_render);
}
diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h
index 5e4e80ddf6..6fe032cdb6 100644
--- a/src/gallium/drivers/i915/i915_reg.h
+++ b/src/gallium/drivers/i915/i915_reg.h
@@ -148,6 +148,7 @@
/* p161 */
#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16))
/* Dword 1 */
+#define CLASSIC_EARLY_DEPTH (1<<31)
#define TEX_DEFAULT_COLOR_OGL (0<<30)
#define TEX_DEFAULT_COLOR_D3D (1<<30)
#define ZR_EARLY_DEPTH (1<<29)
diff --git a/src/gallium/drivers/i915/i915_resource.c b/src/gallium/drivers/i915/i915_resource.c
index 499233ceb9..7f52ba11d6 100644
--- a/src/gallium/drivers/i915/i915_resource.c
+++ b/src/gallium/drivers/i915/i915_resource.c
@@ -31,7 +31,6 @@ i915_resource_from_handle(struct pipe_screen * screen,
void
i915_init_resource_functions(struct i915_context *i915 )
{
- i915->base.is_resource_referenced = u_default_is_resource_referenced;
i915->base.get_transfer = u_get_transfer_vtbl;
i915->base.transfer_map = u_transfer_map_vtbl;
i915->base.transfer_flush_region = u_transfer_flush_region_vtbl;
diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c b/src/gallium/drivers/i915/i915_resource_buffer.c
index d3d6a6752a..d02c768703 100644
--- a/src/gallium/drivers/i915/i915_resource_buffer.c
+++ b/src/gallium/drivers/i915/i915_resource_buffer.c
@@ -60,6 +60,38 @@ i915_buffer_destroy(struct pipe_screen *screen,
}
+static struct pipe_transfer *
+i915_get_transfer(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool);
+
+ if (transfer == NULL)
+ return NULL;
+
+ transfer->resource = resource;
+ transfer->level = level;
+ transfer->usage = usage;
+ transfer->box = *box;
+
+ /* Note strides are zero, this is ok for buffers, but not for
+ * textures 2d & higher at least.
+ */
+ return transfer;
+}
+
+static void
+i915_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ util_slab_free(&i915->transfer_pool, transfer);
+}
+
static void *
i915_buffer_transfer_map( struct pipe_context *pipe,
struct pipe_transfer *transfer )
@@ -91,9 +123,8 @@ struct u_resource_vtbl i915_buffer_vtbl =
{
i915_buffer_get_handle, /* get_handle */
i915_buffer_destroy, /* resource_destroy */
- NULL, /* is_resource_referenced */
- u_default_get_transfer, /* get_transfer */
- u_default_transfer_destroy, /* transfer_destroy */
+ i915_get_transfer, /* get_transfer */
+ i915_transfer_destroy, /* transfer_destroy */
i915_buffer_transfer_map, /* transfer_map */
u_default_transfer_flush_region, /* transfer_flush_region */
u_default_transfer_unmap, /* transfer_unmap */
@@ -115,8 +146,7 @@ i915_buffer_create(struct pipe_screen *screen,
buf->b.vtbl = &i915_buffer_vtbl;
pipe_reference_init(&buf->b.b.reference, 1);
buf->b.b.screen = screen;
-
- buf->data = MALLOC(template->width0);
+ buf->data = align_malloc(template->width0, 16);
buf->free_on_destroy = TRUE;
if (!buf->data)
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index f19106f341..7816925d23 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -172,19 +172,22 @@ i915_texture_set_image_offset(struct i915_texture *tex,
}
static enum i915_winsys_buffer_tile
-i915_texture_tiling(struct pipe_resource *pt)
+i915_texture_tiling(struct i915_screen *is, struct i915_texture *tex)
{
- if (!i915_tiling)
+ if (!is->debug.tiling)
return I915_TILE_NONE;
- if (pt->target == PIPE_TEXTURE_1D)
+ if (tex->b.b.target == PIPE_TEXTURE_1D)
return I915_TILE_NONE;
- if (util_format_is_s3tc(pt->format))
+ if (util_format_is_s3tc(tex->b.b.format))
/* XXX X-tiling might make sense */
return I915_TILE_NONE;
- return I915_TILE_X;
+ if (is->debug.use_blitter)
+ return I915_TILE_X;
+ else
+ return I915_TILE_Y;
}
@@ -401,11 +404,7 @@ i915_texture_layout_3d(struct i915_texture *tex)
static boolean
i915_texture_layout(struct i915_texture * tex)
{
- struct pipe_resource *pt = &tex->b.b;
-
- tex->tiling = i915_texture_tiling(pt);
-
- switch (pt->target) {
+ switch (tex->b.b.target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
@@ -649,11 +648,7 @@ i945_texture_layout_cube(struct i915_texture *tex)
static boolean
i945_texture_layout(struct i915_texture * tex)
{
- struct pipe_resource *pt = &tex->b.b;
-
- tex->tiling = i915_texture_tiling(pt);
-
- switch (pt->target) {
+ switch (tex->b.b.target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
@@ -664,7 +659,7 @@ i945_texture_layout(struct i915_texture * tex)
i945_texture_layout_3d(tex);
break;
case PIPE_TEXTURE_CUBE:
- if (!util_format_is_s3tc(pt->format))
+ if (!util_format_is_s3tc(tex->b.b.format))
i9x5_texture_layout_cube(tex);
else
i945_texture_layout_cube(tex);
@@ -716,14 +711,16 @@ i915_texture_destroy(struct pipe_screen *screen,
}
static struct pipe_transfer *
-i915_texture_get_transfer(struct pipe_context *context,
+i915_texture_get_transfer(struct pipe_context *pipe,
struct pipe_resource *resource,
unsigned level,
unsigned usage,
const struct pipe_box *box)
{
+ struct i915_context *i915 = i915_context(pipe);
struct i915_texture *tex = i915_texture(resource);
- struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer);
+ struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool);
+
if (transfer == NULL)
return NULL;
@@ -737,6 +734,14 @@ i915_texture_get_transfer(struct pipe_context *context,
return transfer;
}
+static void
+i915_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ util_slab_free(&i915->transfer_pool, transfer);
+}
+
static void *
i915_texture_transfer_map(struct pipe_context *pipe,
struct pipe_transfer *transfer)
@@ -754,6 +759,9 @@ i915_texture_transfer_map(struct pipe_context *pipe,
assert(box->z == 0);
offset = i915_texture_offset(tex, transfer->level, box->z);
+ /* TODO this is a sledgehammer */
+ pipe->flush(pipe, NULL);
+
map = iws->buffer_map(iws, tex->buffer,
(transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE);
if (map == NULL)
@@ -779,9 +787,8 @@ struct u_resource_vtbl i915_texture_vtbl =
{
i915_texture_get_handle, /* get_handle */
i915_texture_destroy, /* resource_destroy */
- NULL, /* is_resource_referenced */
i915_texture_get_transfer, /* get_transfer */
- u_default_transfer_destroy, /* transfer_destroy */
+ i915_transfer_destroy, /* transfer_destroy */
i915_texture_transfer_map, /* transfer_map */
u_default_transfer_flush_region, /* transfer_flush_region */
i915_texture_transfer_unmap, /* transfer_unmap */
@@ -808,6 +815,8 @@ i915_texture_create(struct pipe_screen *screen,
pipe_reference_init(&tex->b.b.reference, 1);
tex->b.b.screen = screen;
+ tex->tiling = i915_texture_tiling(is, tex);
+
if (is->is_i945) {
if (!i945_texture_layout(tex))
goto fail;
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index f66478e729..e62b609eb5 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -35,7 +35,6 @@
#include "i915_debug.h"
#include "i915_context.h"
#include "i915_screen.h"
-#include "i915_surface.h"
#include "i915_resource.h"
#include "i915_winsys.h"
#include "i915_public.h"
@@ -99,59 +98,84 @@ i915_get_name(struct pipe_screen *screen)
}
static int
-i915_get_param(struct pipe_screen *screen, enum pipe_cap param)
+i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
{
- switch (param) {
- case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
- return 8;
- case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return 0;
- case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return 8;
+ struct i915_screen *is = i915_screen(screen);
+
+ switch (cap) {
+ /* Supported features (boolean caps). */
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_NPOT_TEXTURES:
- return 1;
+ case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
case PIPE_CAP_TWO_SIDED_STENCIL:
return 1;
- case PIPE_CAP_GLSL:
- return 0;
- case PIPE_CAP_ANISOTROPIC_FILTER:
- return 0;
+
+ /* Features that should be supported (boolean caps). */
+ /* XXX: Just test the code */
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ /* XXX: No code but hw supports it */
case PIPE_CAP_POINT_SPRITE:
+ /* Also lie about these when asked to (needed for GLSL / GL 2.0) */
+ return is->debug.lie ? 1 : 0;
+
+ /* Unsupported features (boolean caps). */
+ case PIPE_CAP_ARRAY_TEXTURES:
+ case PIPE_CAP_DEPTH_CLAMP:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_TIMER_QUERY:
return 0;
- case PIPE_CAP_MAX_RENDER_TARGETS:
- return 1;
+
+ /* Features we can lie about (boolean caps). */
+ case PIPE_CAP_GLSL:
case PIPE_CAP_OCCLUSION_QUERY:
+ return is->debug.lie ? 1 : 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 8;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
return 0;
- case PIPE_CAP_TIMER_QUERY:
- return 0;
- case PIPE_CAP_TEXTURE_SHADOW_MAP:
- return 1;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
return I915_MAX_TEXTURE_2D_LEVELS;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
return I915_MAX_TEXTURE_3D_LEVELS;
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return I915_MAX_TEXTURE_2D_LEVELS;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+
+ /* Fragment coordinate conventions. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 1;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
- case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
- /* disable for now */
- return 0;
+
default:
+ debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap);
return 0;
}
}
static int
-i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap cap)
{
switch(shader) {
case PIPE_SHADER_VERTEX:
- return draw_get_shader_param(shader, param);
+ return draw_get_shader_param(shader, cap);
case PIPE_SHADER_FRAGMENT:
break;
default:
@@ -159,7 +183,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
}
/* XXX: these are just shader model 2.0 values, fix this! */
- switch(param) {
+ switch(cap) {
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
return 96;
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
@@ -192,15 +216,15 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
case PIPE_SHADER_CAP_SUBROUTINES:
return 0;
default:
- assert(0);
+ debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap);
return 0;
}
}
static float
-i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param)
+i915_get_paramf(struct pipe_screen *screen, enum pipe_cap cap)
{
- switch (param) {
+ switch(cap) {
case PIPE_CAP_MAX_LINE_WIDTH:
/* fall-through */
case PIPE_CAP_MAX_LINE_WIDTH_AA:
@@ -218,6 +242,7 @@ i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param)
return 16.0;
default:
+ debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap);
return 0;
}
}
@@ -227,8 +252,7 @@ i915_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage,
- unsigned geom_flags)
+ unsigned tex_usage)
{
static const enum pipe_format tex_supported[] = {
PIPE_FORMAT_B8G8R8A8_UNORM,
@@ -295,24 +319,23 @@ i915_fence_reference(struct pipe_screen *screen,
is->iws->fence_reference(is->iws, ptr, fence);
}
-static int
+static boolean
i915_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flags)
+ struct pipe_fence_handle *fence)
{
struct i915_screen *is = i915_screen(screen);
- return is->iws->fence_signalled(is->iws, fence);
+ return is->iws->fence_signalled(is->iws, fence) == 0;
}
-static int
+static boolean
i915_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
- unsigned flags)
+ uint64_t timeout)
{
struct i915_screen *is = i915_screen(screen);
- return is->iws->fence_finish(is->iws, fence);
+ return is->iws->fence_finish(is->iws, fence) == 0;
}
@@ -322,6 +345,20 @@ i915_fence_finish(struct pipe_screen *screen,
static void
+i915_flush_frontbuffer(struct pipe_screen *screen,
+ struct pipe_resource *resource,
+ unsigned level, unsigned layer,
+ void *winsys_drawable_handle)
+{
+ /* XXX: Dummy right now. */
+ (void)screen;
+ (void)resource;
+ (void)level;
+ (void)layer;
+ (void)winsys_drawable_handle;
+}
+
+static void
i915_destroy_screen(struct pipe_screen *screen)
{
struct i915_screen *is = i915_screen(screen);
@@ -372,6 +409,7 @@ i915_screen_create(struct i915_winsys *iws)
is->base.winsys = NULL;
is->base.destroy = i915_destroy_screen;
+ is->base.flush_frontbuffer = i915_flush_frontbuffer;
is->base.get_name = i915_get_name;
is->base.get_vendor = i915_get_vendor;
diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h
index 0c4186c68e..cfc585b535 100644
--- a/src/gallium/drivers/i915/i915_screen.h
+++ b/src/gallium/drivers/i915/i915_screen.h
@@ -45,16 +45,12 @@ struct i915_screen
struct i915_winsys *iws;
boolean is_i945;
-};
-
-/**
- * Subclass of pipe_transfer
- */
-struct i915_transfer
-{
- struct pipe_transfer base;
- unsigned offset;
+ struct {
+ boolean tiling;
+ boolean lie;
+ boolean use_blitter;
+ } debug;
};
@@ -69,11 +65,5 @@ i915_screen(struct pipe_screen *pscreen)
return (struct i915_screen *) pscreen;
}
-static INLINE struct i915_transfer *
-i915_transfer(struct pipe_transfer *transfer)
-{
- return (struct i915_transfer *)transfer;
-}
-
#endif /* I915_SCREEN_H */
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index bbfcff6bc4..1b57c5776f 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -33,6 +33,7 @@
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
#include "i915_context.h"
@@ -57,10 +58,8 @@ translate_wrap_mode(unsigned wrap)
return TEXCOORDMODE_CLAMP_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
return TEXCOORDMODE_CLAMP_BORDER;
- /*
- case PIPE_TEX_WRAP_MIRRORED_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
return TEXCOORDMODE_MIRROR;
- */
default:
return TEXCOORDMODE_WRAP;
}
@@ -288,6 +287,17 @@ i915_create_sampler_state(struct pipe_context *pipe,
return cso;
}
+static void i915_fixup_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct i915_context *i915 = i915_context(pipe);
+
+ i915->saved_nr_samplers = num;
+ memcpy(&i915->saved_samplers, sampler, sizeof(void *) * num);
+
+ i915->saved_bind_sampler_states(pipe, num, sampler);
+}
+
static void i915_bind_sampler_states(struct pipe_context *pipe,
unsigned num, void **sampler)
{
@@ -467,6 +477,17 @@ i915_create_fs_state(struct pipe_context *pipe,
}
static void
+i915_fixup_bind_fs_state(struct pipe_context *pipe, void *shader)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
+ i915->saved_fs = shader;
+
+ i915->saved_bind_fs_state(pipe, shader);
+}
+
+static void
i915_bind_fs_state(struct pipe_context *pipe, void *shader)
{
struct i915_context *i915 = i915_context(pipe);
@@ -506,6 +527,8 @@ static void i915_bind_vs_state(struct pipe_context *pipe, void *shader)
{
struct i915_context *i915 = i915_context(pipe);
+ i915->saved_vs = shader;
+
/* just pass-through to draw module */
draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader);
@@ -525,32 +548,74 @@ static void i915_set_constant_buffer(struct pipe_context *pipe,
struct pipe_resource *buf)
{
struct i915_context *i915 = i915_context(pipe);
- draw_flush(i915->draw);
+ unsigned new_num = 0;
+ boolean diff = TRUE;
- /* Make a copy of shader constants.
- * During fragment program translation we may add additional
- * constants to the array.
- *
- * We want to consider the situation where some user constants
- * (ex: a material color) may change frequently but the shader program
- * stays the same. In that case we should only be updating the first
- * N constants, leaving any extras from shader translation alone.
- */
+
+ /* XXX don't support geom shaders now */
+ if (shader == PIPE_SHADER_GEOMETRY)
+ return;
+
+ /* if we have a new buffer compare it with the old one */
if (buf) {
- struct i915_buffer *ir = i915_buffer(buf);
- memcpy(i915->current.constants[shader], ir->data, ir->b.b.width0);
- i915->current.num_user_constants[shader] = (ir->b.b.width0 /
- 4 * sizeof(float));
- }
- else {
- i915->current.num_user_constants[shader] = 0;
+ struct i915_buffer *ibuf = i915_buffer(buf);
+ struct pipe_resource *old_buf = i915->constants[shader];
+ struct i915_buffer *old = old_buf ? i915_buffer(old_buf) : NULL;
+ unsigned old_num = i915->current.num_user_constants[shader];
+
+ new_num = ibuf->b.b.width0 / 4 * sizeof(float);
+
+ if (old_num == new_num) {
+ if (old_num == 0)
+ diff = FALSE;
+#if 0
+ /* XXX no point in running this code since st/mesa only uses user buffers */
+ /* Can't compare the buffer data since they are userbuffers */
+ else if (old && old->free_on_destroy)
+ diff = memcmp(old->data, ibuf->data, ibuf->b.b.width0);
+#else
+ (void)old;
+#endif
+ }
+ } else {
+ diff = i915->current.num_user_constants[shader] != 0;
}
+ /*
+ * flush before updateing the state.
+ */
+ if (diff && shader == PIPE_SHADER_FRAGMENT)
+ draw_flush(i915->draw);
+
+ pipe_resource_reference(&i915->constants[shader], buf);
+ i915->current.num_user_constants[shader] = new_num;
- i915->dirty |= I915_NEW_CONSTANTS;
+ if (diff)
+ i915->dirty |= shader == PIPE_SHADER_VERTEX ? I915_NEW_VS_CONSTANTS : I915_NEW_FS_CONSTANTS;
}
+static void
+i915_fixup_set_fragment_sampler_views(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ int i;
+
+ for (i = 0; i < num; i++)
+ pipe_sampler_view_reference(&i915->saved_sampler_views[i],
+ views[i]);
+
+ for (i = num; i < i915->saved_nr_sampler_views; i++)
+ pipe_sampler_view_reference(&i915->saved_sampler_views[i],
+ NULL);
+
+ i915->saved_nr_sampler_views = num;
+
+ i915->saved_set_sampler_views(pipe, num, views);
+}
+
static void i915_set_fragment_sampler_views(struct pipe_context *pipe,
unsigned num,
struct pipe_sampler_view **views)
@@ -622,7 +687,8 @@ static void i915_set_framebuffer_state(struct pipe_context *pipe,
i915->framebuffer.height = fb->height;
i915->framebuffer.nr_cbufs = fb->nr_cbufs;
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
- pipe_surface_reference(&i915->framebuffer.cbufs[i], fb->cbufs[i]);
+ pipe_surface_reference(&i915->framebuffer.cbufs[i],
+ i < fb->nr_cbufs ? fb->cbufs[i] : NULL);
}
pipe_surface_reference(&i915->framebuffer.zsbuf, fb->zsbuf);
@@ -637,6 +703,8 @@ static void i915_set_clip_state( struct pipe_context *pipe,
struct i915_context *i915 = i915_context(pipe);
draw_flush(i915->draw);
+ i915->saved_clip = *clip;
+
draw_set_clip_state(i915->draw, clip);
i915->dirty |= I915_NEW_CLIP;
@@ -667,7 +735,7 @@ i915_create_rasterizer_state(struct pipe_context *pipe,
{
struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state );
- cso->templ = rasterizer;
+ cso->templ = *rasterizer;
cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
cso->light_twoside = rasterizer->light_twoside;
cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE;
@@ -738,7 +806,7 @@ static void i915_bind_rasterizer_state( struct pipe_context *pipe,
/* pass-through to draw module */
draw_set_rasterizer_state(i915->draw,
- (i915->rasterizer ? i915->rasterizer->templ : NULL),
+ (i915->rasterizer ? &(i915->rasterizer->templ) : NULL),
raster);
i915->dirty |= I915_NEW_RASTERIZER;
@@ -755,16 +823,28 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe,
const struct pipe_vertex_buffer *buffers)
{
struct i915_context *i915 = i915_context(pipe);
- /* Because we change state before the draw_set_vertex_buffers call
- * we need a flush here, just to be sure.
- */
- draw_flush(i915->draw);
+ struct draw_context *draw = i915->draw;
+ int i;
- memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0]));
- i915->num_vertex_buffers = count;
+ util_copy_vertex_buffers(i915->saved_vertex_buffers,
+ &i915->saved_nr_vertex_buffers,
+ buffers, count);
+#if 0
+ /* XXX doesn't look like this is needed */
+ /* unmap old */
+ for (i = 0; i < i915->num_vertex_buffers; i++) {
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+#endif
/* pass-through to draw module */
- draw_set_vertex_buffers(i915->draw, count, buffers);
+ draw_set_vertex_buffers(draw, count, buffers);
+
+ /* map new */
+ for (i = 0; i < count; i++) {
+ void *buf = i915_buffer(buffers[i].buffer)->data;
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
}
static void *
@@ -789,10 +869,7 @@ i915_bind_vertex_elements_state(struct pipe_context *pipe,
struct i915_context *i915 = i915_context(pipe);
struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems;
- /* Because we change state before the draw_set_vertex_buffers call
- * we need a flush here, just to be sure.
- */
- draw_flush(i915->draw);
+ i915->saved_velems = velems;
/* pass-through to draw module */
if (i915_velems) {
@@ -870,4 +947,16 @@ i915_init_state_functions( struct i915_context *i915 )
i915->base.set_viewport_state = i915_set_viewport_state;
i915->base.set_vertex_buffers = i915_set_vertex_buffers;
i915->base.set_index_buffer = i915_set_index_buffer;
+ i915->base.redefine_user_buffer = u_default_redefine_user_buffer;
+}
+
+void
+i915_init_fixup_state_functions( struct i915_context *i915 )
+{
+ i915->saved_bind_fs_state = i915->base.bind_fs_state;
+ i915->base.bind_fs_state = i915_fixup_bind_fs_state;
+ i915->saved_bind_sampler_states = i915->base.bind_fragment_sampler_states;
+ i915->base.bind_fragment_sampler_states = i915_fixup_bind_sampler_states;
+ i915->saved_set_sampler_views = i915->base.set_fragment_sampler_views;
+ i915->base.set_fragment_sampler_views = i915_fixup_set_fragment_sampler_views;
}
diff --git a/src/gallium/drivers/i915/i915_state.h b/src/gallium/drivers/i915/i915_state.h
index b4074dc35b..3f4e40294e 100644
--- a/src/gallium/drivers/i915/i915_state.h
+++ b/src/gallium/drivers/i915/i915_state.h
@@ -48,6 +48,7 @@ extern struct i915_tracked_state i915_hw_immediate;
extern struct i915_tracked_state i915_hw_dynamic;
extern struct i915_tracked_state i915_hw_fs;
extern struct i915_tracked_state i915_hw_framebuffer;
+extern struct i915_tracked_state i915_hw_dst_buf_vars;
extern struct i915_tracked_state i915_hw_constants;
void i915_update_derived(struct i915_context *i915);
diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c
index 1d4026a214..59ac2f7292 100644
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -165,6 +165,7 @@ static struct i915_tracked_state *atoms[] = {
&i915_hw_dynamic,
&i915_hw_fs,
&i915_hw_framebuffer,
+ &i915_hw_dst_buf_vars,
&i915_hw_constants,
NULL,
};
diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c
index d61a8c3407..204cee6fe9 100644
--- a/src/gallium/drivers/i915/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915/i915_state_dynamic.c
@@ -46,18 +46,34 @@
* (active) state every time a 4kb boundary is crossed.
*/
-static INLINE void set_dynamic_indirect(struct i915_context *i915,
- unsigned offset,
- const unsigned *src,
- unsigned dwords)
+static INLINE void set_dynamic(struct i915_context *i915,
+ unsigned offset,
+ const unsigned state)
+{
+ if (i915->current.dynamic[offset] == state)
+ return;
+
+ i915->current.dynamic[offset] = state;
+ i915->dynamic_dirty |= 1 << offset;
+ i915->hardware_dirty |= I915_HW_DYNAMIC;
+}
+
+
+
+static INLINE void set_dynamic_array(struct i915_context *i915,
+ unsigned offset,
+ const unsigned *src,
+ unsigned dwords)
{
unsigned i;
if (!memcmp(src, &i915->current.dynamic[offset], dwords * 4))
return;
- for (i = 0; i < dwords; i++)
+ for (i = 0; i < dwords; i++) {
i915->current.dynamic[offset + i] = src[i];
+ i915->dynamic_dirty |= 1 << (offset + i);
+ }
i915->hardware_dirty |= I915_HW_DYNAMIC;
}
@@ -79,12 +95,7 @@ static void upload_MODES4(struct i915_context *i915)
*/
modes4 |= i915->blend->modes4;
- /* Always, so that we know when state is in-active:
- */
- set_dynamic_indirect(i915,
- I915_DYNAMIC_MODES4,
- &modes4,
- 1);
+ set_dynamic(i915, I915_DYNAMIC_MODES4, modes4);
}
const struct i915_tracked_state i915_upload_MODES4 = {
@@ -107,10 +118,7 @@ static void upload_BFO(struct i915_context *i915)
bfo[0] |= i915->stencil_ref.ref_value[1] << BFO_STENCIL_REF_SHIFT;
}
- set_dynamic_indirect(i915,
- I915_DYNAMIC_BFO_0,
- &(bfo[0]),
- 2);
+ set_dynamic_array(i915, I915_DYNAMIC_BFO_0, bfo, 2);
}
const struct i915_tracked_state i915_upload_BFO = {
@@ -141,10 +149,7 @@ static void upload_BLENDCOLOR(struct i915_context *i915)
color[3]);
}
- set_dynamic_indirect(i915,
- I915_DYNAMIC_BC_0,
- bc,
- 2);
+ set_dynamic_array(i915, I915_DYNAMIC_BC_0, bc, 2);
}
const struct i915_tracked_state i915_upload_BLENDCOLOR = {
@@ -161,10 +166,7 @@ static void upload_IAB(struct i915_context *i915)
{
unsigned iab = i915->blend->iab;
- set_dynamic_indirect(i915,
- I915_DYNAMIC_IAB,
- &iab,
- 1);
+ set_dynamic(i915, I915_DYNAMIC_IAB, iab);
}
const struct i915_tracked_state i915_upload_IAB = {
@@ -179,10 +181,8 @@ const struct i915_tracked_state i915_upload_IAB = {
*/
static void upload_DEPTHSCALE(struct i915_context *i915)
{
- set_dynamic_indirect(i915,
- I915_DYNAMIC_DEPTHSCALE_0,
- &(i915->rasterizer->ds[0].u),
- 2);
+ set_dynamic_array(i915, I915_DYNAMIC_DEPTHSCALE_0,
+ &i915->rasterizer->ds[0].u, 2);
}
const struct i915_tracked_state i915_upload_DEPTHSCALE = {
@@ -234,10 +234,7 @@ static void upload_STIPPLE(struct i915_context *i915)
(p[3] << 12));
}
- set_dynamic_indirect(i915,
- I915_DYNAMIC_STP_0,
- &st[0],
- 2);
+ set_dynamic_array(i915, I915_DYNAMIC_STP_0, st, 2);
}
const struct i915_tracked_state i915_upload_STIPPLE = {
@@ -253,10 +250,7 @@ const struct i915_tracked_state i915_upload_STIPPLE = {
*/
static void upload_SCISSOR_ENABLE( struct i915_context *i915 )
{
- set_dynamic_indirect(i915,
- I915_DYNAMIC_SC_ENA_0,
- &(i915->rasterizer->sc[0]),
- 1);
+ set_dynamic(i915, I915_DYNAMIC_SC_ENA_0, i915->rasterizer->sc[0]);
}
const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = {
@@ -282,10 +276,7 @@ static void upload_SCISSOR_RECT(struct i915_context *i915)
sc[1] = (y1 << 16) | (x1 & 0xffff);
sc[2] = (y2 << 16) | (x2 & 0xffff);
- set_dynamic_indirect(i915,
- I915_DYNAMIC_SC_RECT_0,
- &sc[0],
- 3);
+ set_dynamic_array(i915, I915_DYNAMIC_SC_RECT_0, sc, 3);
}
const struct i915_tracked_state i915_upload_SCISSOR_RECT = {
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index c48d53ffbb..0155cd8351 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -35,411 +35,425 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
-static unsigned translate_format( enum pipe_format format )
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+struct i915_tracked_hw_state {
+ const char *name;
+ void (*validate)(struct i915_context *, unsigned *batch_space);
+ void (*emit)(struct i915_context *);
+ unsigned dirty, batch_space;
+};
+
+
+static void
+validate_flush(struct i915_context *i915, unsigned *batch_space)
{
- switch (format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- return COLOR_BUF_ARGB8888;
- case PIPE_FORMAT_B5G6R5_UNORM:
- return COLOR_BUF_RGB565;
- default:
- assert(0);
- return 0;
- }
+ *batch_space = i915->flush_dirty ? 1 : 0;
}
-static unsigned translate_depth_format( enum pipe_format zformat )
+static void
+emit_flush(struct i915_context *i915)
{
- switch (zformat) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- return DEPTH_FRMT_24_FIXED_8_OTHER;
- case PIPE_FORMAT_Z16_UNORM:
- return DEPTH_FRMT_16_FIXED;
- default:
- assert(0);
- return 0;
- }
+ /* Cache handling is very cheap atm. State handling can request to flushes:
+ * - I915_FLUSH_CACHE which is a flush everything request and
+ * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.
+ * Because the cache handling is so dumb, no explicit "invalidate map cache".
+ * Also, the first is a strict superset of the latter, so the following logic
+ * works. */
+ if (i915->flush_dirty & I915_FLUSH_CACHE)
+ OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE);
+ else if (i915->flush_dirty & I915_PIPELINE_FLUSH)
+ OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
}
+uint32_t invariant_state[] = {
+ _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |
+ AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0,
-/**
- * Examine framebuffer state to determine width, height.
- */
-static boolean
-framebuffer_size(const struct pipe_framebuffer_state *fb,
- uint *width, uint *height)
+ _3DSTATE_DFLT_DIFFUSE_CMD, 0,
+
+ _3DSTATE_DFLT_SPEC_CMD, 0,
+
+ _3DSTATE_DFLT_Z_CMD, 0,
+
+ _3DSTATE_COORD_SET_BINDINGS |
+ CSB_TCB(0, 0) |
+ CSB_TCB(1, 1) |
+ CSB_TCB(2, 2) |
+ CSB_TCB(3, 3) |
+ CSB_TCB(4, 4) |
+ CSB_TCB(5, 5) |
+ CSB_TCB(6, 6) |
+ CSB_TCB(7, 7),
+
+ _3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) |
+ ENABLE_TEXKILL_3D_4D |
+ TEXKILL_4D,
+
+ _3DSTATE_DEPTH_SUBRECT_DISABLE,
+
+ /* disable indirect state for now
+ */
+ _3DSTATE_LOAD_INDIRECT | 0, 0};
+
+static void
+emit_invariant(struct i915_context *i915)
{
- if (fb->cbufs[0]) {
- *width = fb->cbufs[0]->width;
- *height = fb->cbufs[0]->height;
- return TRUE;
- }
- else if (fb->zsbuf) {
- *width = fb->zsbuf->width;
- *height = fb->zsbuf->height;
- return TRUE;
- }
- else {
- *width = *height = 0;
- return FALSE;
- }
+ i915_winsys_batchbuffer_write(i915->batch, invariant_state,
+ Elements(invariant_state)*sizeof(uint32_t));
}
-static inline uint32_t
-buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling)
+static void
+validate_immediate(struct i915_context *i915, unsigned *batch_space)
{
- uint32_t tiling_bits = 0;
-
- switch (tiling) {
- case I915_TILE_Y:
- tiling_bits |= BUF_3D_TILE_WALK_Y;
- case I915_TILE_X:
- tiling_bits |= BUF_3D_TILED_SURFACE;
- case I915_TILE_NONE:
- break;
- }
+ unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
+ 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
+ 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
+ 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
+ i915->immediate_dirty;
- return tiling_bits;
+ if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo)
+ i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
+
+ *batch_space = 1 + util_bitcount(dirty);
}
-/* Push the state into the sarea and/or texture memory.
- */
-void
-i915_emit_hardware_state(struct i915_context *i915 )
+static void
+emit_immediate(struct i915_context *i915)
{
- /* XXX: there must be an easier way */
- const unsigned dwords = ( 14 +
- 7 +
- I915_MAX_DYNAMIC +
- 8 +
- 2 + I915_TEX_UNITS*3 +
- 2 + I915_TEX_UNITS*3 +
- 2 + I915_MAX_CONSTANT*4 +
-#if 0
- i915->current.program_len +
-#else
- i915->fs->program_len +
-#endif
- 6
- ) * 3/2; /* plus 50% margin */
- const unsigned relocs = ( I915_TEX_UNITS +
- 3
- ) * 3/2; /* plus 50% margin */
+ /* remove unwatned bits and S7 */
+ unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
+ 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
+ 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
+ 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
+ i915->immediate_dirty;
+ int i, num = util_bitcount(dirty);
+ assert(num && num <= I915_MAX_IMMEDIATE);
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ dirty << 4 | (num - 1));
+
+ if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
+ if (i915->vbo)
+ OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
+ i915->current.immediate[I915_IMMEDIATE_S0]);
+ else
+ OUT_BATCH(0);
+ }
- uintptr_t save_ptr;
- size_t save_relocs;
+ for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
+ if (dirty & (1 << i))
+ OUT_BATCH(i915->current.immediate[i]);
+ }
+}
- if (I915_DBG_ON(DBG_ATOMS))
- i915_dump_hardware_dirty(i915, __FUNCTION__);
+static void
+validate_dynamic(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1));
+}
- if(!BEGIN_BATCH(dwords, relocs)) {
- FLUSH_BATCH(NULL);
- assert(BEGIN_BATCH(dwords, relocs));
+static void
+emit_dynamic(struct i915_context *i915)
+{
+ int i;
+ for (i = 0; i < I915_MAX_DYNAMIC; i++) {
+ if (i915->dynamic_dirty & (1 << i))
+ OUT_BATCH(i915->current.dynamic[i]);
}
+}
- save_ptr = (uintptr_t)i915->batch->ptr;
- save_relocs = i915->batch->relocs;
-
- /* 14 dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_INVARIENT)
- {
- OUT_BATCH(_3DSTATE_AA_CMD |
- AA_LINE_ECAAR_WIDTH_ENABLE |
- AA_LINE_ECAAR_WIDTH_1_0 |
- AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
-
- OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
- OUT_BATCH(0);
-
- OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
- OUT_BATCH(0);
-
- OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
- OUT_BATCH(0);
-
- OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
- CSB_TCB(0, 0) |
- CSB_TCB(1, 1) |
- CSB_TCB(2, 2) |
- CSB_TCB(3, 3) |
- CSB_TCB(4, 4) |
- CSB_TCB(5, 5) |
- CSB_TCB(6, 6) |
- CSB_TCB(7, 7));
-
- OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
- ENABLE_POINT_RASTER_RULE |
- OGL_POINT_RASTER_RULE |
- ENABLE_LINE_STRIP_PROVOKE_VRTX |
- ENABLE_TRI_FAN_PROVOKE_VRTX |
- LINE_STRIP_PROVOKE_VRTX(1) |
- TRI_FAN_PROVOKE_VRTX(2) |
- ENABLE_TEXKILL_3D_4D |
- TEXKILL_4D);
-
- /* Need to initialize this to zero.
- */
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0));
- OUT_BATCH(0);
-
- OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
-
- /* disable indirect state for now
- */
- OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
- OUT_BATCH(0);
+static void
+validate_static(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = 0;
+
+ if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
+ i915->validation_buffers[i915->num_validation_buffers++]
+ = i915->current.cbuf_bo;
+ *batch_space += 3;
}
- /* 7 dwords, 1 relocs */
- if (i915->hardware_dirty & I915_HW_IMMEDIATE)
- {
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- I1_LOAD_S(0) |
- I1_LOAD_S(1) |
- I1_LOAD_S(2) |
- I1_LOAD_S(4) |
- I1_LOAD_S(5) |
- I1_LOAD_S(6) |
- (5));
-
- if(i915->vbo)
- OUT_RELOC(i915->vbo,
- I915_USAGE_VERTEX,
- i915->current.immediate[I915_IMMEDIATE_S0]);
- else
- /* FIXME: we should not do this */
- OUT_BATCH(0);
- OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]);
- OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]);
- OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]);
- OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]);
- OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]);
- }
-
-#if 01
- /* I915_MAX_DYNAMIC dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_DYNAMIC)
- {
- int i;
- for (i = 0; i < I915_MAX_DYNAMIC; i++) {
- OUT_BATCH(i915->current.dynamic[i]);
- }
+ if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
+ i915->validation_buffers[i915->num_validation_buffers++]
+ = i915->current.depth_bo;
+ *batch_space += 3;
}
-#endif
-#if 01
- /* 8 dwords, 2 relocs */
- if (i915->hardware_dirty & I915_HW_STATIC)
- {
- struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
- struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
+ if (i915->static_dirty & I915_DST_VARS)
+ *batch_space += 2;
+
+ if (i915->static_dirty & I915_DST_RECT)
+ *batch_space += 5;
+}
- if (cbuf_surface) {
- struct i915_texture *tex = i915_texture(cbuf_surface->texture);
- assert(tex);
+static void
+emit_static(struct i915_context *i915)
+{
+ if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(i915->current.cbuf_flags);
+ OUT_RELOC(i915->current.cbuf_bo,
+ I915_USAGE_RENDER,
+ 0);
+ }
- OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ /* What happens if no zbuf??
+ */
+ if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(i915->current.depth_flags);
+ OUT_RELOC(i915->current.depth_bo,
+ I915_USAGE_RENDER,
+ 0);
+ }
- OUT_BATCH(BUF_3D_ID_COLOR_BACK |
- BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
- buf_3d_tiling_bits(tex->tiling));
+ if (i915->static_dirty & I915_DST_VARS) {
+ OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+ OUT_BATCH(i915->current.dst_buf_vars);
+ }
+}
- OUT_RELOC(tex->buffer,
- I915_USAGE_RENDER,
- 0);
- }
+static void
+validate_map(struct i915_context *i915, unsigned *batch_space)
+{
+ const uint enabled = i915->current.sampler_enable_flags;
+ uint unit;
+ struct i915_texture *tex;
- /* What happens if no zbuf??
- */
- if (depth_surface) {
- struct i915_texture *tex = i915_texture(depth_surface->texture);
- unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level,
- depth_surface->u.tex.first_layer);
- assert(tex);
- assert(offset == 0);
-
- OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
-
- assert(tex);
- OUT_BATCH(BUF_3D_ID_DEPTH |
- BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
- buf_3d_tiling_bits(tex->tiling));
-
- OUT_RELOC(tex->buffer,
- I915_USAGE_RENDER,
- 0);
- }
+ *batch_space = i915->current.sampler_enable_nr ?
+ 2 + 3*i915->current.sampler_enable_nr : 0;
- {
- unsigned cformat, zformat = 0;
-
- if (cbuf_surface)
- cformat = cbuf_surface->format;
- else
- cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */
- cformat = translate_format(cformat);
-
- if (depth_surface)
- zformat = translate_depth_format( i915->framebuffer.zsbuf->format );
-
- OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
- OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */
- DSTORG_VERT_BIAS(0x8) | /* .5 */
- LOD_PRECLAMP_OGL |
- TEX_DEFAULT_COLOR_OGL |
- cformat |
- zformat );
+ for (unit = 0; unit < I915_TEX_UNITS; unit++) {
+ if (enabled & (1 << unit)) {
+ tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
+ i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
}
}
-#endif
+}
-#if 01
- /* texture images */
- /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */
- if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER))
- {
- const uint nr = i915->current.sampler_enable_nr;
- if (nr) {
- const uint enabled = i915->current.sampler_enable_flags;
- uint unit;
- uint count = 0;
- OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
- OUT_BATCH(enabled);
- for (unit = 0; unit < I915_TEX_UNITS; unit++) {
- if (enabled & (1 << unit)) {
- struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
- struct i915_winsys_buffer *buf = texture->buffer;
- assert(buf);
-
- count++;
-
- OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
- OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
- OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
- }
- }
- assert(count == nr);
+static void
+emit_map(struct i915_context *i915)
+{
+ const uint nr = i915->current.sampler_enable_nr;
+ if (nr) {
+ const uint enabled = i915->current.sampler_enable_flags;
+ uint unit;
+ uint count = 0;
+ OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
+ OUT_BATCH(enabled);
+ for (unit = 0; unit < I915_TEX_UNITS; unit++) {
+ if (enabled & (1 << unit)) {
+ struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
+ struct i915_winsys_buffer *buf = texture->buffer;
+ assert(buf);
+
+ count++;
+
+ OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
+ OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
+ OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
}
}
-#endif
+ assert(count == nr);
+ }
+}
+
+static void
+validate_sampler(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = i915->current.sampler_enable_nr ?
+ 2 + 3*i915->current.sampler_enable_nr : 0;
+}
-#if 01
- /* samplers */
- /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_SAMPLER)
- {
- if (i915->current.sampler_enable_nr) {
- int i;
-
- OUT_BATCH( _3DSTATE_SAMPLER_STATE |
- (3 * i915->current.sampler_enable_nr) );
-
- OUT_BATCH( i915->current.sampler_enable_flags );
-
- for (i = 0; i < I915_TEX_UNITS; i++) {
- if (i915->current.sampler_enable_flags & (1<<i)) {
- OUT_BATCH( i915->current.sampler[i][0] );
- OUT_BATCH( i915->current.sampler[i][1] );
- OUT_BATCH( i915->current.sampler[i][2] );
- }
+static void
+emit_sampler(struct i915_context *i915)
+{
+ if (i915->current.sampler_enable_nr) {
+ int i;
+
+ OUT_BATCH( _3DSTATE_SAMPLER_STATE |
+ (3 * i915->current.sampler_enable_nr) );
+
+ OUT_BATCH( i915->current.sampler_enable_flags );
+
+ for (i = 0; i < I915_TEX_UNITS; i++) {
+ if (i915->current.sampler_enable_flags & (1<<i)) {
+ OUT_BATCH( i915->current.sampler[i][0] );
+ OUT_BATCH( i915->current.sampler[i][1] );
+ OUT_BATCH( i915->current.sampler[i][2] );
}
}
}
-#endif
+}
+
+static void
+validate_constants(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = i915->fs->num_constants ?
+ 2 + 4*i915->fs->num_constants : 0;
+}
+
+static void
+emit_constants(struct i915_context *i915)
+{
+ /* Collate the user-defined constants with the fragment shader's
+ * immediates according to the constant_flags[] array.
+ */
+ const uint nr = i915->fs->num_constants;
+ if (nr) {
+ uint i;
+
+ OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
+ OUT_BATCH((1 << nr) - 1);
-#if 01
- /* constants */
- /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_CONSTANTS)
- {
- /* Collate the user-defined constants with the fragment shader's
- * immediates according to the constant_flags[] array.
- */
- const uint nr = i915->fs->num_constants;
- if (nr) {
- uint i;
-
- OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
- OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) );
-
- for (i = 0; i < nr; i++) {
- const uint *c;
- if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
- /* grab user-defined constant */
- c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i];
- }
- else {
- /* emit program constant */
- c = (uint *) i915->fs->constants[i];
- }
+ for (i = 0; i < nr; i++) {
+ const uint *c;
+ if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
+ /* grab user-defined constant */
+ c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data;
+ c += 4 * i;
+ }
+ else {
+ /* emit program constant */
+ c = (uint *) i915->fs->constants[i];
+ }
#if 0 /* debug */
- {
- float *f = (float *) c;
- printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
- (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
- ? "user" : "immediate"));
- }
-#endif
- OUT_BATCH(*c++);
- OUT_BATCH(*c++);
- OUT_BATCH(*c++);
- OUT_BATCH(*c++);
+ {
+ float *f = (float *) c;
+ printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
+ (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
+ ? "user" : "immediate"));
}
+#endif
+ OUT_BATCH(*c++);
+ OUT_BATCH(*c++);
+ OUT_BATCH(*c++);
+ OUT_BATCH(*c++);
}
}
-#endif
+}
-#if 01
- /* Fragment program */
- /* i915->current.program_len dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_PROGRAM)
- {
+static void
+validate_program(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = i915->fs->program_len;
+}
+
+static void
+emit_program(struct i915_context *i915)
+{
uint i;
/* we should always have, at least, a pass-through program */
assert(i915->fs->program_len > 0);
for (i = 0; i < i915->fs->program_len; i++) {
OUT_BATCH(i915->fs->program[i]);
}
+}
+
+static void
+emit_draw_rect(struct i915_context *i915)
+{
+ if (i915->static_dirty & I915_DST_RECT) {
+ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+ OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
+ OUT_BATCH(i915->current.draw_offset);
+ OUT_BATCH(i915->current.draw_size);
+ OUT_BATCH(i915->current.draw_offset);
}
-#endif
+}
-#if 01
- /* drawing surface size */
- /* 6 dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_STATIC)
- {
- uint w, h;
- struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
- struct i915_texture *tex = i915_texture(cbuf_surface->texture);
- unsigned x, y;
- int layer;
- uint32_t draw_offset;
- boolean ret;
+static boolean
+i915_validate_state(struct i915_context *i915, unsigned *batch_space)
+{
+ unsigned tmp;
+
+ i915->num_validation_buffers = 0;
+ if (i915->hardware_dirty & I915_HW_INVARIANT)
+ *batch_space = Elements(invariant_state);
+ else
+ *batch_space = 0;
+
+#define VALIDATE_ATOM(atom, hw_dirty) \
+ if (i915->hardware_dirty & hw_dirty) { \
+ validate_##atom(i915, &tmp); \
+ *batch_space += tmp; }
+ VALIDATE_ATOM(flush, I915_HW_FLUSH);
+ VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
+ VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);
+ VALIDATE_ATOM(static, I915_HW_STATIC);
+ VALIDATE_ATOM(map, I915_HW_MAP);
+ VALIDATE_ATOM(sampler, I915_HW_SAMPLER);
+ VALIDATE_ATOM(constants, I915_HW_CONSTANTS);
+ VALIDATE_ATOM(program, I915_HW_PROGRAM);
+#undef VALIDATE_ATOM
+
+ if (i915->num_validation_buffers == 0)
+ return TRUE;
- ret = framebuffer_size(&i915->framebuffer, &w, &h);
- assert(ret);
+ if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,
+ i915->num_validation_buffers))
+ return FALSE;
- layer = cbuf_surface->u.tex.first_layer;
+ return TRUE;
+}
- x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx;
- y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy;
+/* Push the state into the sarea and/or texture memory.
+ */
+void
+i915_emit_hardware_state(struct i915_context *i915 )
+{
+ unsigned batch_space;
+ uintptr_t save_ptr;
- draw_offset = x | (y << 16);
+ assert(i915->dirty == 0);
- /* XXX flush only required when the draw_offset changes! */
- OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
- OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
- OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
- OUT_BATCH(draw_offset);
- OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16));
- OUT_BATCH(draw_offset);
+ if (I915_DBG_ON(DBG_ATOMS))
+ i915_dump_hardware_dirty(i915, __FUNCTION__);
+
+ if (!i915_validate_state(i915, &batch_space)) {
+ FLUSH_BATCH(NULL);
+ assert(i915_validate_state(i915, &batch_space));
}
-#endif
- I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__,
+ if(!BEGIN_BATCH(batch_space)) {
+ FLUSH_BATCH(NULL);
+ assert(i915_validate_state(i915, &batch_space));
+ assert(BEGIN_BATCH(batch_space));
+ }
+
+ save_ptr = (uintptr_t)i915->batch->ptr;
+
+#define EMIT_ATOM(atom, hw_dirty) \
+ if (i915->hardware_dirty & hw_dirty) \
+ emit_##atom(i915);
+ EMIT_ATOM(flush, I915_HW_FLUSH);
+ EMIT_ATOM(invariant, I915_HW_INVARIANT);
+ EMIT_ATOM(immediate, I915_HW_IMMEDIATE);
+ EMIT_ATOM(dynamic, I915_HW_DYNAMIC);
+ EMIT_ATOM(static, I915_HW_STATIC);
+ EMIT_ATOM(map, I915_HW_MAP);
+ EMIT_ATOM(sampler, I915_HW_SAMPLER);
+ EMIT_ATOM(constants, I915_HW_CONSTANTS);
+ EMIT_ATOM(program, I915_HW_PROGRAM);
+ EMIT_ATOM(draw_rect, I915_HW_STATIC);
+#undef EMIT_ATOM
+
+ I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__,
((uintptr_t)i915->batch->ptr - save_ptr) / 4,
- i915->batch->relocs - save_relocs);
+ batch_space);
+ assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space);
i915->hardware_dirty = 0;
+ i915->immediate_dirty = 0;
+ i915->dynamic_dirty = 0;
+ i915->static_dirty = 0;
+ i915->flush_dirty = 0;
}
diff --git a/src/gallium/drivers/i915/i915_state_fpc.c b/src/gallium/drivers/i915/i915_state_fpc.c
index ec7cec0e47..1959a24691 100644
--- a/src/gallium/drivers/i915/i915_state_fpc.c
+++ b/src/gallium/drivers/i915/i915_state_fpc.c
@@ -40,7 +40,7 @@ static void update_hw_constants(struct i915_context *i915)
struct i915_tracked_state i915_hw_constants = {
"hw_constants",
update_hw_constants,
- I915_NEW_CONSTANTS | I915_NEW_FS
+ I915_NEW_FS_CONSTANTS | I915_NEW_FS
};
diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c
index f9ade7077f..8134864739 100644
--- a/src/gallium/drivers/i915/i915_state_immediate.c
+++ b/src/gallium/drivers/i915/i915_state_immediate.c
@@ -36,12 +36,20 @@
#include "util/u_memory.h"
-/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet.
- * Would like to opportunistically recombine all these fragments into
- * a single packet containing only what has changed, but for now emit
- * as multiple packets.
+/* Convinience function to check immediate state.
*/
+static INLINE void set_immediate(struct i915_context *i915,
+ unsigned offset,
+ const unsigned state)
+{
+ if (i915->current.immediate[offset] == state)
+ return;
+
+ i915->current.immediate[offset] = state;
+ i915->immediate_dirty |= 1 << offset;
+ i915->hardware_dirty |= I915_HW_IMMEDIATE;
+}
@@ -56,9 +64,14 @@ static void upload_S0S1(struct i915_context *i915)
*/
LIS0 = i915->vbo_offset;
+ /* Need to force this */
+ if (i915->dirty & I915_NEW_VBO) {
+ i915->immediate_dirty |= 1 << I915_IMMEDIATE_S0;
+ i915->hardware_dirty |= I915_HW_IMMEDIATE;
+ }
+
/* I915_NEW_VERTEX_SIZE
*/
- /* XXX do this where the vertex size is calculated! */
{
unsigned vertex_size = i915->current.vertex_info.size;
@@ -66,16 +79,8 @@ static void upload_S0S1(struct i915_context *i915)
(vertex_size << 16));
}
- /* I915_NEW_VBO
- */
- if (1 ||
- i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 ||
- i915->current.immediate[I915_IMMEDIATE_S1] != LIS1)
- {
- i915->current.immediate[I915_IMMEDIATE_S0] = LIS0;
- i915->current.immediate[I915_IMMEDIATE_S1] = LIS1;
- i915->hardware_dirty |= I915_HW_IMMEDIATE;
- }
+ set_immediate(i915, I915_IMMEDIATE_S0, LIS0);
+ set_immediate(i915, I915_IMMEDIATE_S1, LIS1);
}
const struct i915_tracked_state i915_upload_S0S1 = {
@@ -98,21 +103,13 @@ static void upload_S2S4(struct i915_context *i915)
{
LIS2 = i915->current.vertex_info.hwfmt[1];
LIS4 = i915->current.vertex_info.hwfmt[0];
- /*
- debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4);
- */
assert(LIS4); /* should never be zero? */
}
LIS4 |= i915->rasterizer->LIS4;
- if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] ||
- LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) {
-
- i915->current.immediate[I915_IMMEDIATE_S2] = LIS2;
- i915->current.immediate[I915_IMMEDIATE_S4] = LIS4;
- i915->hardware_dirty |= I915_HW_IMMEDIATE;
- }
+ set_immediate(i915, I915_IMMEDIATE_S2, LIS2);
+ set_immediate(i915, I915_IMMEDIATE_S4, LIS4);
}
const struct i915_tracked_state i915_upload_S2S4 = {
@@ -142,15 +139,12 @@ static void upload_S5(struct i915_context *i915)
#if 0
/* I915_NEW_RASTERIZER
*/
- if (i915->state.Polygon->OffsetFill) {
+ if (i915->rasterizer->LIS7) {
LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE;
}
#endif
- if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) {
- i915->current.immediate[I915_IMMEDIATE_S5] = LIS5;
- i915->hardware_dirty |= I915_HW_IMMEDIATE;
- }
+ set_immediate(i915, I915_IMMEDIATE_S5, LIS5);
}
const struct i915_tracked_state i915_upload_S5 = {
@@ -180,14 +174,11 @@ static void upload_S6(struct i915_context *i915)
*/
LIS6 |= i915->depth_stencil->depth_LIS6;
- if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) {
- i915->current.immediate[I915_IMMEDIATE_S6] = LIS6;
- i915->hardware_dirty |= I915_HW_IMMEDIATE;
- }
+ set_immediate(i915, I915_IMMEDIATE_S6, LIS6);
}
const struct i915_tracked_state i915_upload_S6 = {
- "imm s6",
+ "imm S6",
upload_S6,
I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER
};
@@ -204,10 +195,9 @@ static void upload_S7(struct i915_context *i915)
*/
LIS7 = i915->rasterizer->LIS7;
- if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) {
- i915->current.immediate[I915_IMMEDIATE_S7] = LIS7;
- i915->hardware_dirty |= I915_HW_IMMEDIATE;
- }
+#if 0
+ set_immediate(i915, I915_IMMEDIATE_S7, LIS7);
+#endif
}
const struct i915_tracked_state i915_upload_S7 = {
diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c
index dc9a4c1e2f..2865298318 100644
--- a/src/gallium/drivers/i915/i915_state_static.c
+++ b/src/gallium/drivers/i915/i915_state_static.c
@@ -27,17 +27,120 @@
#include "i915_reg.h"
#include "i915_context.h"
#include "i915_state.h"
+#include "i915_resource.h"
+#include "i915_screen.h"
/***********************************************************************
* Update framebuffer state
*/
+static unsigned translate_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return COLOR_BUF_ARGB8888;
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ return COLOR_BUF_RGB565;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned translate_depth_format(enum pipe_format zformat)
+{
+ switch (zformat) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ return DEPTH_FRMT_24_FIXED_8_OTHER;
+ case PIPE_FORMAT_Z16_UNORM:
+ return DEPTH_FRMT_16_FIXED;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static inline uint32_t
+buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling)
+{
+ uint32_t tiling_bits = 0;
+
+ switch (tiling) {
+ case I915_TILE_Y:
+ tiling_bits |= BUF_3D_TILE_WALK_Y;
+ case I915_TILE_X:
+ tiling_bits |= BUF_3D_TILED_SURFACE;
+ case I915_TILE_NONE:
+ break;
+ }
+
+ return tiling_bits;
+}
+
static void update_framebuffer(struct i915_context *i915)
{
- /* HW emit currently references framebuffer state directly:
+ struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
+ struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
+ unsigned x, y;
+ int layer;
+ uint32_t draw_offset, draw_size;
+
+ if (cbuf_surface) {
+ struct i915_texture *tex = i915_texture(cbuf_surface->texture);
+ assert(tex);
+
+ i915->current.cbuf_bo = tex->buffer;
+ i915->current.cbuf_flags = BUF_3D_ID_COLOR_BACK |
+ BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
+ buf_3d_tiling_bits(tex->tiling);
+
+ layer = cbuf_surface->u.tex.first_layer;
+
+ x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx;
+ y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy;
+ } else {
+ i915->current.cbuf_bo = NULL;
+ x = y = 0;
+ }
+ i915->static_dirty |= I915_DST_BUF_COLOR;
+
+ /* What happens if no zbuf??
*/
+ if (depth_surface) {
+ struct i915_texture *tex = i915_texture(depth_surface->texture);
+ unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level,
+ depth_surface->u.tex.first_layer);
+ assert(tex);
+ assert(offset == 0);
+
+ i915->current.depth_bo = tex->buffer;
+ i915->current.depth_flags = BUF_3D_ID_DEPTH |
+ BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
+ buf_3d_tiling_bits(tex->tiling);
+ } else
+ i915->current.depth_bo = NULL;
+ i915->static_dirty |= I915_DST_BUF_DEPTH;
+
+ /* drawing rect calculations */
+ draw_offset = x | (y << 16);
+ draw_size = (i915->framebuffer.width - 1 + x) |
+ ((i915->framebuffer.height - 1 + y) << 16);
+ if (i915->current.draw_offset != draw_offset) {
+ i915->current.draw_offset = draw_offset;
+ i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH);
+ i915->static_dirty |= I915_DST_RECT;
+ }
+ if (i915->current.draw_size != draw_size) {
+ i915->current.draw_size = draw_size;
+ i915->static_dirty |= I915_DST_RECT;
+ }
+
i915->hardware_dirty |= I915_HW_STATIC;
+
+ /* flush the cache in case we sample from the old renderbuffers */
+ i915_set_flush_dirty(i915, I915_FLUSH_CACHE);
}
struct i915_tracked_state i915_hw_framebuffer = {
@@ -45,3 +148,52 @@ struct i915_tracked_state i915_hw_framebuffer = {
update_framebuffer,
I915_NEW_FRAMEBUFFER
};
+
+static void update_dst_buf_vars(struct i915_context *i915)
+{
+ struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
+ struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
+ uint32_t dst_buf_vars, cformat, zformat;
+ uint32_t early_z = 0;
+
+ if (cbuf_surface)
+ cformat = cbuf_surface->format;
+ else
+ cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */
+ cformat = translate_format(cformat);
+
+ if (depth_surface) {
+ struct i915_texture *tex = i915_texture(depth_surface->texture);
+ struct i915_screen *is = i915_screen(i915->base.screen);
+
+ zformat = translate_depth_format(depth_surface->format);
+
+ if (is->is_i945 && tex->tiling != I915_TILE_NONE
+ && !i915->fs->info.writes_z)
+ early_z = CLASSIC_EARLY_DEPTH;
+ } else
+ zformat = 0;
+
+ dst_buf_vars = DSTORG_HORT_BIAS(0x8) | /* .5 */
+ DSTORG_VERT_BIAS(0x8) | /* .5 */
+ LOD_PRECLAMP_OGL |
+ TEX_DEFAULT_COLOR_OGL |
+ cformat |
+ zformat |
+ early_z;
+
+ if (i915->current.dst_buf_vars != dst_buf_vars) {
+ if (early_z != (i915->current.dst_buf_vars & CLASSIC_EARLY_DEPTH))
+ i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH);
+
+ i915->current.dst_buf_vars = dst_buf_vars;
+ i915->static_dirty |= I915_DST_VARS;
+ i915->hardware_dirty |= I915_HW_STATIC;
+ }
+}
+
+struct i915_tracked_state i915_hw_dst_buf_vars = {
+ "dst buf vars",
+ update_dst_buf_vars,
+ I915_NEW_FRAMEBUFFER | I915_NEW_FS
+};
diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c
index becc6e93c2..d02c420f6c 100644
--- a/src/gallium/drivers/i915/i915_surface.c
+++ b/src/gallium/drivers/i915/i915_surface.c
@@ -27,6 +27,7 @@
#include "i915_surface.h"
#include "i915_resource.h"
+#include "i915_state.h"
#include "i915_blit.h"
#include "i915_reg.h"
#include "i915_screen.h"
@@ -37,16 +38,119 @@
#include "util/u_memory.h"
#include "util/u_pack_color.h"
+/*
+ * surface functions using the render engine
+ */
+
+static void
+i915_surface_copy_render(struct pipe_context *pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct i915_context *i915 = i915_context(pipe);
+
+ util_blitter_save_blend(i915->blitter, (void *)i915->blend);
+ util_blitter_save_depth_stencil_alpha(i915->blitter, (void *)i915->depth_stencil);
+ util_blitter_save_stencil_ref(i915->blitter, &i915->stencil_ref);
+ util_blitter_save_rasterizer(i915->blitter, (void *)i915->rasterizer);
+ util_blitter_save_fragment_shader(i915->blitter, i915->saved_fs);
+ util_blitter_save_vertex_shader(i915->blitter, i915->saved_vs);
+ util_blitter_save_viewport(i915->blitter, &i915->viewport);
+ util_blitter_save_clip(i915->blitter, &i915->saved_clip);
+ util_blitter_save_vertex_elements(i915->blitter, i915->saved_velems);
+ util_blitter_save_vertex_buffers(i915->blitter, i915->saved_nr_vertex_buffers,
+ i915->saved_vertex_buffers);
+
+ util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer);
+
+ util_blitter_save_fragment_sampler_states(i915->blitter,
+ i915->saved_nr_samplers,
+ i915->saved_samplers);
+ util_blitter_save_fragment_sampler_views(i915->blitter,
+ i915->saved_nr_sampler_views,
+ i915->saved_sampler_views);
+
+ util_blitter_copy_region(i915->blitter, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box, TRUE);
+}
+
+static void
+i915_clear_render_target_render(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ struct pipe_framebuffer_state fb_state;
+
+ util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer);
+
+ fb_state.width = dst->width;
+ fb_state.height = dst->height;
+ fb_state.nr_cbufs = 1;
+ fb_state.cbufs[0] = dst;
+ fb_state.zsbuf = NULL;
+ pipe->set_framebuffer_state(pipe, &fb_state);
+
+ if (i915->dirty)
+ i915_update_derived(i915);
+
+ i915_clear_emit(pipe, PIPE_CLEAR_COLOR, rgba, 0.0, 0x0,
+ dstx, dsty, width, height);
+
+ pipe->set_framebuffer_state(pipe, &i915->blitter->saved_fb_state);
+ util_unreference_framebuffer_state(&i915->blitter->saved_fb_state);
+ i915->blitter->saved_fb_state.nr_cbufs = ~0;
+}
+
+static void
+i915_clear_depth_stencil_render(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ struct pipe_framebuffer_state fb_state;
+
+ util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer);
+
+ fb_state.width = dst->width;
+ fb_state.height = dst->height;
+ fb_state.nr_cbufs = 0;
+ fb_state.zsbuf = dst;
+ pipe->set_framebuffer_state(pipe, &fb_state);
+
+ if (i915->dirty)
+ i915_update_derived(i915);
+
+ i915_clear_emit(pipe, clear_flags & PIPE_CLEAR_DEPTHSTENCIL,
+ NULL, depth, stencil,
+ dstx, dsty, width, height);
+
+ pipe->set_framebuffer_state(pipe, &i915->blitter->saved_fb_state);
+ util_unreference_framebuffer_state(&i915->blitter->saved_fb_state);
+ i915->blitter->saved_fb_state.nr_cbufs = ~0;
+}
+
+/*
+ * surface functions using the blitter
+ */
/* Assumes all values are within bounds -- no checking at this level -
* do it higher up if required.
*/
static void
-i915_surface_copy(struct pipe_context *pipe,
- struct pipe_resource *dst, unsigned dst_level,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src, unsigned src_level,
- const struct pipe_box *src_box)
+i915_surface_copy_blitter(struct pipe_context *pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
{
struct i915_texture *dst_tex = i915_texture(dst);
struct i915_texture *src_tex = i915_texture(src);
@@ -66,7 +170,6 @@ i915_surface_copy(struct pipe_context *pipe,
assert(src_box->z == 0);
src_offset = i915_texture_offset(src_tex, src_level, src_box->z);
- assert( dst != src );
assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) );
assert( util_format_get_blockwidth(dpt->format) == util_format_get_blockwidth(spt->format) );
assert( util_format_get_blockheight(dpt->format) == util_format_get_blockheight(spt->format) );
@@ -81,13 +184,12 @@ i915_surface_copy(struct pipe_context *pipe,
(short) src_box->width, (short) src_box->height );
}
-
static void
-i915_clear_render_target(struct pipe_context *pipe,
- struct pipe_surface *dst,
- const float *rgba,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height)
+i915_clear_render_target_blitter(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
{
struct i915_texture *tex = i915_texture(dst->texture);
struct pipe_resource *pt = &tex->b.b;
@@ -109,13 +211,13 @@ i915_clear_render_target(struct pipe_context *pipe,
}
static void
-i915_clear_depth_stencil(struct pipe_context *pipe,
- struct pipe_surface *dst,
- unsigned clear_flags,
- double depth,
- unsigned stencil,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height)
+i915_clear_depth_stencil_blitter(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
{
struct i915_texture *tex = i915_texture(dst->texture);
struct pipe_resource *pt = &tex->b.b;
@@ -193,9 +295,15 @@ i915_surface_destroy(struct pipe_context *ctx,
void
i915_init_surface_functions(struct i915_context *i915)
{
- i915->base.resource_copy_region = i915_surface_copy;
- i915->base.clear_render_target = i915_clear_render_target;
- i915->base.clear_depth_stencil = i915_clear_depth_stencil;
+ if (i915_screen(i915->base.screen)->debug.use_blitter) {
+ i915->base.resource_copy_region = i915_surface_copy_blitter;
+ i915->base.clear_render_target = i915_clear_render_target_blitter;
+ i915->base.clear_depth_stencil = i915_clear_depth_stencil_blitter;
+ } else {
+ i915->base.resource_copy_region = i915_surface_copy_render;
+ i915->base.clear_render_target = i915_clear_render_target_render;
+ i915->base.clear_depth_stencil = i915_clear_depth_stencil_render;
+ }
i915->base.create_surface = i915_create_surface;
i915->base.surface_destroy = i915_surface_destroy;
}
diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h
index 24ea416f01..21cfdc9613 100644
--- a/src/gallium/drivers/i915/i915_winsys.h
+++ b/src/gallium/drivers/i915/i915_winsys.h
@@ -76,7 +76,6 @@ struct i915_winsys_batchbuffer {
size_t size;
size_t relocs;
- size_t max_relocs;
/*@}*/
};
@@ -95,6 +94,18 @@ struct i915_winsys {
(*batchbuffer_create)(struct i915_winsys *iws);
/**
+ * Validate buffers for usage in this batchbuffer.
+ * Does space-checking and asorted other book-keeping.
+ *
+ * @batch
+ * @buffers array to buffers to validate
+ * @num_of_buffers size of the passed array
+ */
+ boolean (*validate_buffers)(struct i915_winsys_batchbuffer *batch,
+ struct i915_winsys_buffer **buffers,
+ int num_of_buffers);
+
+ /**
* Emit a relocation to a buffer.
* Target position in batchbuffer is the same as ptr.
*
@@ -103,11 +114,12 @@ struct i915_winsys {
* @usage how is the hardware going to use the buffer.
* @offset add this to the reloc buffers address
* @target buffer where to write the address, null for batchbuffer.
+ * @fenced relocation needs a fence.
*/
int (*batchbuffer_reloc)(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *reloc,
enum i915_winsys_buffer_usage usage,
- unsigned offset, bool fenced);
+ unsigned offset, boolean fenced);
/**
* Flush a bufferbatch.
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index e80067f3b1..3c93579246 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -64,13 +64,11 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch)
}
struct brw_batchbuffer *
-brw_batchbuffer_alloc(struct brw_winsys_screen *sws,
- struct brw_chipset chipset)
+brw_batchbuffer_alloc(struct brw_winsys_screen *sws)
{
struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
batch->sws = sws;
- batch->chipset = chipset;
brw_batchbuffer_reset(batch);
return batch;
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 6ca9f617f5..6ecb91857d 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -26,7 +26,6 @@ struct brw_batchbuffer {
struct brw_winsys_screen *sws;
struct brw_winsys_buffer *buf;
- struct brw_chipset chipset;
/**
* Values exported to speed up the writing the batchbuffer,
@@ -47,8 +46,8 @@ struct brw_batchbuffer {
/*@}*/
};
-struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws,
- struct brw_chipset chipset );
+struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws );
+
void brw_batchbuffer_free(struct brw_batchbuffer *batch);
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index ccba205e8c..66b13ea58e 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -66,16 +66,14 @@ compile_clip_prog( struct brw_context *brw,
c.func.single_program_flow = 1;
- c.chipset = brw->chipset;
c.key = *key;
- c.need_ff_sync = c.chipset.is_igdng;
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.header_position_offset = ATTR_SIZE;
- if (c.chipset.is_igdng)
+ if (brw->gen == 5)
delta = 3 * REG_SIZE;
else
delta = REG_SIZE;
@@ -97,7 +95,7 @@ compile_clip_prog( struct brw_context *brw,
if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT)
c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
else
c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
index 80e3a11a37..f123b73c06 100644
--- a/src/gallium/drivers/i965/brw_clip.h
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -125,12 +125,10 @@ struct brw_clip_compile {
GLuint last_tmp;
GLboolean need_direction;
- struct brw_chipset chipset;
GLuint last_mrf;
GLuint header_position_offset;
- GLboolean need_ff_sync;
GLuint nr_color_attrs;
GLuint offset_color0;
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
index 66caadc4d5..4ed7362171 100644
--- a/src/gallium/drivers/i965/brw_clip_line.c
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -32,6 +32,7 @@
#include "util/u_debug.h"
#include "brw_defines.h"
+#include "brw_context.h"
#include "brw_eu.h"
#include "brw_clip.h"
@@ -41,7 +42,7 @@
static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
{
GLuint i = 0,j;
-
+ struct brw_context *brw = c->func.brw;
/* Register usage is static, precompute here:
*/
c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
@@ -79,7 +80,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
i++;
}
- if (c->need_ff_sync) {
+ if (brw->needs_ff_sync) {
c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
i++;
}
@@ -120,6 +121,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
static void clip_and_emit_line( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_indirect vtx0 = brw_indirect(0, 0);
struct brw_indirect vtx1 = brw_indirect(1, 0);
struct brw_indirect newvtx0 = brw_indirect(2, 0);
@@ -146,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_clip_init_clipmask(c);
/* -ve rhw workaround */
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
@@ -183,7 +185,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
* Both can be negative on GM965/G965 due to RHW workaround
* if so, this object should be rejected.
*/
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
{
@@ -208,7 +210,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
/* If both are positive, do nothing */
/* Only on GM965/G965 */
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
}
@@ -223,7 +225,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_ENDIF(p, is_neg2);
}
}
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 5c3ccfd8d0..f56edf3177 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -109,7 +109,7 @@ clip_unit_create_from_key(struct brw_context *brw,
/* Although up to 16 concurrent Clip threads are allowed on IGDNG,
* only 2 threads can output VUEs at a time.
*/
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
clip.thread4.max_threads = 16 - 1;
else
clip.thread4.max_threads = 2 - 1;
@@ -134,7 +134,7 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.clip5.api_mode = BRW_CLIP_API_OGL;
clip.clip5.clip_mode = key->clip_mode;
- if (BRW_IS_G4X(brw))
+ if (brw->is_g4x)
clip.clip5.negative_w_clip_test = 1;
clip.clip6.clipper_viewport_state_ptr = 0;
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
index 069524bc14..7d400e6028 100644
--- a/src/gallium/drivers/i965/brw_clip_tri.c
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -30,6 +30,7 @@
*/
#include "brw_defines.h"
+#include "brw_context.h"
#include "brw_eu.h"
#include "brw_clip.h"
@@ -43,6 +44,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
GLuint nr_verts )
{
GLuint i = 0,j;
+ struct brw_context *brw = c->func.brw;
/* Register usage is static, precompute here:
*/
@@ -69,7 +71,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
for (j = 0; j < 3; j++) {
GLuint delta = c->key.nr_attrs*16 + 32;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
delta = c->key.nr_attrs * 16 + 32 * 3;
brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
@@ -110,7 +112,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
i++;
}
- if (c->need_ff_sync) {
+ if (brw->needs_ff_sync) {
c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
i++;
}
@@ -563,7 +565,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
/* if -ve rhw workaround bit is set,
do cliptest */
- if (c->chipset.is_965) {
+ if (p->brw->has_negative_rhw_bug) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
index 23e51ee9bc..5713f25da7 100644
--- a/src/gallium/drivers/i965/brw_clip_util.c
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -31,6 +31,7 @@
#include "brw_defines.h"
+#include "brw_context.h"
#include "brw_eu.h"
#include "brw_clip.h"
@@ -126,6 +127,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
GLboolean force_edgeflag)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_reg tmp = get_tmp(c);
GLuint i;
@@ -142,7 +144,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
for (i = 0; i < c->key.nr_attrs; i++) {
GLuint delta = i*16 + 32;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
delta = i * 16 + 32 * 3;
if (delta == c->offset_edgeflag) {
@@ -176,7 +178,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
if (i & 1) {
GLuint delta = i*16 + 32;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
delta = i * 16 + 32 * 3;
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
@@ -350,7 +352,8 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
- if (c->need_ff_sync) {
+ struct brw_context *brw = c->func.brw;
+ if (brw->needs_ff_sync) {
struct brw_compile *p = &c->func;
struct brw_instruction *need_ff_sync;
@@ -379,7 +382,8 @@ void brw_clip_ff_sync(struct brw_clip_compile *c)
void brw_clip_init_ff_sync(struct brw_clip_compile *c)
{
- if (c->need_ff_sync) {
+ struct brw_context *brw = c->func.brw;
+ if (brw->needs_ff_sync) {
struct brw_compile *p = &c->func;
brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index a2736f783d..41a468a32f 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -107,7 +107,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen,
void *priv)
{
struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
-
+ struct brw_screen *brs = brw_screen(screen);
if (!brw) {
debug_printf("%s: failed to alloc context\n", __FUNCTION__);
return NULL;
@@ -117,7 +117,10 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen,
brw->base.priv = priv;
brw->base.destroy = brw_destroy_context;
brw->sws = brw_screen(screen)->sws;
- brw->chipset = brw_screen(screen)->chipset;
+ brw->is_g4x = brs->is_g4x;
+ brw->needs_ff_sync = brs->needs_ff_sync;
+ brw->has_negative_rhw_bug = brs->has_negative_rhw_bug;
+ brw->gen = brs->gen;
brw_init_resource_functions( brw );
brw_pipe_blend_init( brw );
@@ -145,7 +148,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen,
make_empty_list(&brw->query.active_head);
- brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset );
+ brw->batch = brw_batchbuffer_alloc( brw->sws );
if (brw->batch == NULL)
goto fail;
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index d927f382d5..45fc26dd7d 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -529,7 +529,14 @@ struct brw_query_object {
struct brw_context
{
struct pipe_context base;
- struct brw_chipset chipset;
+ int gen;
+ boolean has_negative_rhw_bug;
+ boolean needs_ff_sync;
+ boolean is_g4x;
+
+ int urb_size;
+ int vs_max_threads;
+ int wm_max_threads;
struct brw_winsys_screen *sws;
@@ -854,11 +861,5 @@ brw_context( struct pipe_context *ctx )
return (struct brw_context *)ctx;
}
-
-#define BRW_IS_965(brw) ((brw)->chipset.is_965)
-#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng)
-#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x)
-
-
#endif
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
index e201ce4d7c..7547eae97c 100644
--- a/src/gallium/drivers/i965/brw_defines.h
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -463,6 +463,13 @@
#define BRW_COMPRESSION_2NDHALF 1
#define BRW_COMPRESSION_COMPRESSED 2
+#define GEN6_COMPRESSION_1Q 0
+#define GEN6_COMPRESSION_2Q 1
+#define GEN6_COMPRESSION_3Q 2
+#define GEN6_COMPRESSION_4Q 3
+#define GEN6_COMPRESSION_1H 0
+#define GEN6_COMPRESSION_2H 2
+
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
@@ -502,6 +509,27 @@
#define BRW_MASK_ENABLE 0
#define BRW_MASK_DISABLE 1
+/** @{
+ *
+ * Gen6 has replaced "mask enable/disable" with WECtrl, which is
+ * effectively the same but much simpler to think about. Now, there
+ * are two contributors ANDed together to whether channels are
+ * executed: The predication on the instruction, and the channel write
+ * enable.
+ */
+/**
+ * This is the default value. It means that a channel's write enable is set
+ * if the per-channel IP is pointing at this instruction.
+ */
+#define BRW_WE_NORMAL 0
+/**
+ * This is used like BRW_MASK_DISABLE, and causes all channels to have
+ * their write enable set. Note that predication still contributes to
+ * whether the channel actually gets written.
+ */
+#define BRW_WE_ALL 1
+/** @} */
+
#define BRW_OPCODE_MOV 1
#define BRW_OPCODE_SEL 2
#define BRW_OPCODE_NOT 4
@@ -531,6 +559,8 @@
#define BRW_OPCODE_POP 47
#define BRW_OPCODE_WAIT 48
#define BRW_OPCODE_SEND 49
+#define BRW_OPCODE_SENDC 50
+#define BRW_OPCODE_MATH 56
#define BRW_OPCODE_ADD 64
#define BRW_OPCODE_MUL 65
#define BRW_OPCODE_AVG 66
@@ -550,6 +580,7 @@
#define BRW_OPCODE_DP2 87
#define BRW_OPCODE_DPA2 88
#define BRW_OPCODE_LINE 89
+#define BRW_OPCODE_PLN 90
#define BRW_OPCODE_NOP 126
#define BRW_PREDICATE_NONE 0
@@ -599,6 +630,8 @@
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
+#define BRW_MRF_COMPR4 (1 << 7)
+
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
@@ -645,13 +678,14 @@
#define BRW_POLYGON_FACING_BACK 1
#define BRW_MESSAGE_TARGET_NULL 0
-#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */
#define BRW_MESSAGE_TARGET_SAMPLER 2
#define BRW_MESSAGE_TARGET_GATEWAY 3
-#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
-#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 /* sampler cache on GEN6 */
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 /* render cache on Gen6 */
#define BRW_MESSAGE_TARGET_URB 6
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+#define BRW_MESSAGE_TARGET_CONST_CACHE 9 /* GEN6 */
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
@@ -674,20 +708,15 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
-
-/* for IGDNG only */
+#define BRW_SAMPLER_MESSAGE_SAMPLE_GEN5 0
+#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5 1
+#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5 2
+#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5 3
+#define BRW_SAMPLER_MESSAGE_SAMPLE_DERIVS_GEN5 4
+#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5 5
+#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5 6
+
+/* for GEN5 only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
@@ -705,10 +734,24 @@
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+/* This one stays the same across generations. */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+/* GEN4 */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
-#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+/* G45, GEN5 */
+#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
+#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
+/* GEN6 */
+#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
@@ -728,6 +771,16 @@
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+/* GEN6 */
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE_GEN6 7
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE_GEN6 8
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE_GEN6 9
+#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE_GEN6 10
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORLD_SCATTERED_WRITE_GEN6 11
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6 12
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE_GEN6 13
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE_GEN6 14
+
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
@@ -736,7 +789,8 @@
#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
-#define BRW_MATH_FUNCTION_TAN 9
+#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
#define BRW_MATH_FUNCTION_POW 10
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
@@ -787,17 +841,33 @@
#define CMD_PIPELINED_STATE_POINTERS 0x7800
#define CMD_BINDING_TABLE_PTRS 0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)
+
+#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */
+# define PS_SAMPLER_STATE_CHANGE (1 << 12)
+# define GS_SAMPLER_STATE_CHANGE (1 << 9)
+# define VS_SAMPLER_STATE_CHANGE (1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
#define CMD_VERTEX_BUFFER 0x7808
# define BRW_VB0_INDEX_SHIFT 27
+# define GEN6_VB0_INDEX_SHIFT 26
# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20)
# define BRW_VB0_PITCH_SHIFT 0
#define CMD_VERTEX_ELEMENT 0x7809
# define BRW_VE0_INDEX_SHIFT 27
+# define GEN6_VE0_INDEX_SHIFT 26
# define BRW_VE0_FORMAT_SHIFT 16
# define BRW_VE0_VALID (1 << 26)
+# define GEN6_VE0_VALID (1 << 25)
# define BRW_VE0_SRC_OFFSET_SHIFT 0
# define BRW_VE1_COMPONENT_NOSTORE 0
# define BRW_VE1_COMPONENT_STORE_SRC 1
@@ -816,6 +886,236 @@
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS_965 0x780b
#define CMD_VF_STATISTICS_GM45 0x680b
+#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */
+
+#define CMD_URB 0x7805 /* GEN6+ */
+# define GEN6_URB_VS_SIZE_SHIFT 16
+# define GEN6_URB_VS_ENTRIES_SHIFT 0
+# define GEN6_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_URB_GS_SIZE_SHIFT 0
+
+#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY (1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY (1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10)
+
+#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
+
+#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE (1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT 25
+# define GEN6_VS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_VS_CACHE_DISABLE (1 << 1)
+# define GEN6_VS_ENABLE (1 << 0)
+
+#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE (1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT 11
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT 25
+# define GEN6_GS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9)
+# define GEN6_GS_RENDERING_ENABLE (1 << 8)
+/* DW6 */
+# define GEN6_GS_ENABLE (1 << 15)
+
+#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10)
+/**
+ * Just does cheap culling based on the clip distance. Bits must be
+ * disjoint with USER_CLIP_CLIP_DISTANCE bits.
+ */
+# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0
+/* DW2 */
+# define GEN6_CLIP_ENABLE (1 << 31)
+# define GEN6_CLIP_API_OGL (0 << 30)
+# define GEN6_CLIP_API_D3D (1 << 30)
+# define GEN6_CLIP_XY_TEST (1 << 28)
+# define GEN6_CLIP_Z_TEST (1 << 27)
+# define GEN6_CLIP_GB_TEST (1 << 26)
+/** 8-bit field of which user clip distances to clip aganist. */
+# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16
+# define GEN6_CLIP_MODE_NORMAL (0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9)
+# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6
+# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5)
+
+#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */
+/* DW1 */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_SF_SWIZZLE_ENABLE (1 << 21)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE (1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7)
+# define GEN6_SF_FRONT_SOLID (0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME (1 << 5)
+# define GEN6_SF_FRONT_POINT (2 << 5)
+# define GEN6_SF_BACK_SOLID (0 << 3)
+# define GEN6_SF_BACK_WIREFRAME (1 << 3)
+# define GEN6_SF_BACK_POINT (2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1)
+# define GEN6_SF_WINDING_CCW (1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE (1 << 31)
+# define GEN6_SF_CULL_BOTH (0 << 29)
+# define GEN6_SF_CULL_NONE (1 << 29)
+# define GEN6_SF_CULL_FRONT (2 << 29)
+# define GEN6_SF_CULL_BACK (3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE (1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W (1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X (1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT 22
+# define ATTRIBUTE_1_SOURCE_SHIFT 16
+# define ATTRIBUTE_0_OVERRIDE_W (1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X (1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
+# define ATTRIBUTE_0_SOURCE_SHIFT 0
+
+# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
+# define ATTRIBUTE_SWIZZLE_SHIFT 6
+
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+
+#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE (1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT 27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE (1 << 31)
+# define GEN6_WM_DEPTH_CLEAR (1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE (1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT 25
+# define GEN6_WM_KILL_ENABLE (1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH (1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9)
+# define GEN6_WM_USES_SOURCE_W (1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_WM_POSOFFSET_NONE (0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID (2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1)
+# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+
+#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */
+#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */
+#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12)
+
+#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */
#define CMD_DRAW_RECT 0x7900
#define CMD_BLEND_CONSTANT_COLOR 0x7901
@@ -827,6 +1127,25 @@
#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
#define CMD_AA_LINE_PARAMETERS 0x790a
+#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT 29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+
+#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER (0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define MS_NUMSAMPLES_1 (0 << 1)
+# define MS_NUMSAMPLES_4 (2 << 1)
+# define MS_NUMSAMPLES_8 (3 << 1)
+
+#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */
+# define DEPTH_CLEAR_VALID (1 << 15)
+/* DW1: depth clear value */
+
#define CMD_PIPE_CONTROL 0x7a00
#define CMD_3D_PRIM 0x7b00
@@ -839,8 +1158,8 @@
#define R02_PRIM_END 0x1
#define R02_PRIM_START 0x2
-#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
- (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
+#define URB_SIZES(brw) (brw->gen == 5 ? 1024 : \
+ (brw->is_g4x ? 384 : 256)) /* 512 bit units */
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
index 28c83515ba..b093569f0c 100644
--- a/src/gallium/drivers/i965/brw_disasm.c
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -49,12 +49,14 @@ struct {
[BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
@@ -69,13 +71,14 @@ struct {
[BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
- [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
@@ -144,7 +147,6 @@ char *chan_sel[4] = {
};
char *dest_condmod[16] = {
- [0] = NULL
};
char *debug_ctrl[2] = {
@@ -157,6 +159,16 @@ char *saturate[2] = {
[1] = ".sat"
};
+char *accwr[2] = {
+ [0] = "",
+ [1] = "AccWrEnable"
+};
+
+char *wectrl[2] = {
+ [0] = "WE_normal",
+ [1] = "WE_all"
+};
+
char *exec_size[8] = {
[0] = "1",
[1] = "2",
@@ -204,6 +216,7 @@ char *compr_ctrl[4] = {
[0] = "",
[1] = "sechalf",
[2] = "compr",
+ [3] = "compr4",
};
char *dep_ctrl[4] = {
@@ -233,6 +246,16 @@ char *reg_encoding[8] = {
[7] = "F"
};
+int reg_type_size[8] = {
+ [0] = 4,
+ [1] = 4,
+ [2] = 2,
+ [3] = 2,
+ [4] = 1,
+ [5] = 1,
+ [7] = 4
+};
+
char *imm_encoding[8] = {
[0] = "UD",
[1] = "D",
@@ -321,6 +344,11 @@ char *math_precision[2] = {
[1] = "partial_precision"
};
+char *urb_opcode[2] = {
+ [0] = "urb_write",
+ [1] = "ff_sync",
+};
+
char *urb_swizzle[4] = {
[BRW_URB_SWIZZLE_NONE] = "",
[BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
@@ -416,6 +444,11 @@ static int print_opcode (FILE *file, int id)
static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
{
int err = 0;
+
+ /* Clear the Compr4 instruction compression bit. */
+ if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ _reg_nr &= ~(1 << 7);
+
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:
@@ -427,6 +460,9 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
case BRW_ARF_ACCUMULATOR:
format (file, "acc%d", _reg_nr & 0x0f);
break;
+ case BRW_ARF_FLAG:
+ format (file, "f%d", _reg_nr & 0x0f);
+ break;
case BRW_ARF_MASK:
format (file, "mask%d", _reg_nr & 0x0f);
break;
@@ -457,7 +493,7 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
return err;
}
-static int dest (FILE *file, const struct brw_instruction *inst)
+static int dest (FILE *file, struct brw_instruction *inst)
{
int err = 0;
@@ -469,7 +505,8 @@ static int dest (FILE *file, const struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+ reg_type_size[inst->bits1.da1.dest_reg_type]);
format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
}
@@ -477,7 +514,8 @@ static int dest (FILE *file, const struct brw_instruction *inst)
{
string (file, "g[a0");
if (inst->bits1.ia1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+ reg_type_size[inst->bits1.ia1.dest_reg_type]);
if (inst->bits1.ia1.dest_indirect_offset)
format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
string (file, "]");
@@ -493,7 +531,8 @@ static int dest (FILE *file, const struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da16.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+ reg_type_size[inst->bits1.da16.dest_reg_type]);
string (file, "<1>");
err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
@@ -534,7 +573,7 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
if (err == -1)
return 0;
if (sub_reg_num)
- format (file, ".%d", sub_reg_num);
+ format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
src_align1_region (file, _vert_stride, _width, _horiz_stride);
err |= control (file, "src reg encoding", reg_encoding, type, NULL);
return err;
@@ -588,11 +627,12 @@ static int src_da16 (FILE *file,
if (err == -1)
return 0;
if (_subreg_nr)
- format (file, ".%d", _subreg_nr);
+ /* bit4 for subreg number byte addressing. Make this same meaning as
+ in da1 case, so output looks consistent. */
+ format (file, ".%d", 16 / reg_type_size[_reg_type]);
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
- string (file, ",1,1>");
- err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ string (file, ",4,1>");
/*
* Three kinds of swizzle display:
* identity - nothing printed
@@ -619,11 +659,12 @@ static int src_da16 (FILE *file,
err |= control (file, "channel select", chan_sel, swz_z, NULL);
err |= control (file, "channel select", chan_sel, swz_w, NULL);
}
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
return err;
}
-static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) {
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
switch (type) {
case BRW_REGISTER_TYPE_UD:
format (file, "0x%08xUD", inst->bits3.ud);
@@ -652,7 +693,7 @@ static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) {
return 0;
}
-static int src0 (FILE *file, const struct brw_instruction *inst)
+static int src0 (FILE *file, struct brw_instruction *inst)
{
if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src0_reg_type,
@@ -712,7 +753,7 @@ static int src0 (FILE *file, const struct brw_instruction *inst)
}
}
-static int src1 (FILE *file, const struct brw_instruction *inst)
+static int src1 (FILE *file, struct brw_instruction *inst)
{
if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src1_reg_type,
@@ -772,7 +813,7 @@ static int src1 (FILE *file, const struct brw_instruction *inst)
}
}
-int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
+int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen)
{
int err = 0;
int space = 0;
@@ -822,7 +863,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
err |= src1 (file, inst);
}
- if (inst->header.opcode == BRW_OPCODE_SEND) {
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC) {
newline (file);
pad (file, 16);
space = 0;
@@ -842,24 +884,70 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
inst->bits3.math.precision, &space);
break;
case BRW_MESSAGE_TARGET_SAMPLER:
- format (file, " (%d, %d, ",
- inst->bits3.sampler.binding_table_index,
- inst->bits3.sampler.sampler);
- err |= control (file, "sampler target format", sampler_target_format,
- inst->bits3.sampler.return_format, NULL);
- string (file, ")");
+ if (gen >= 5) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen5.binding_table_index,
+ inst->bits3.sampler_gen5.sampler,
+ inst->bits3.sampler_gen5.msg_type,
+ inst->bits3.sampler_gen5.simd_mode);
+ } else if (0 /* FINISHME: is_g4x */) {
+ format (file, " (%d, %d)",
+ inst->bits3.sampler_g4x.binding_table_index,
+ inst->bits3.sampler_g4x.sampler);
+ } else {
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format", sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ }
+ break;
+ case BRW_MESSAGE_TARGET_DATAPORT_READ:
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d, %d, %d)",
+ inst->bits3.dp_render_cache.binding_table_index,
+ inst->bits3.dp_render_cache.msg_control,
+ inst->bits3.dp_render_cache.msg_type,
+ inst->bits3.dp_render_cache.send_commit_msg,
+ inst->bits3.dp_render_cache.msg_length,
+ inst->bits3.dp_render_cache.response_length);
+ } else if (gen >= 5 /* FINISHME: || is_g4x */) {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read_gen5.binding_table_index,
+ inst->bits3.dp_read_gen5.msg_control,
+ inst->bits3.dp_read_gen5.msg_type);
+ } else {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read.binding_table_index,
+ inst->bits3.dp_read.msg_control,
+ inst->bits3.dp_read.msg_type);
+ }
break;
case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
- format (file, " (%d, %d, %d, %d)",
- inst->bits3.dp_write.binding_table_index,
- (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
- inst->bits3.dp_write.msg_control,
- inst->bits3.dp_write.msg_type,
- inst->bits3.dp_write.send_commit_msg);
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d, %d, %d)",
+ inst->bits3.dp_render_cache.binding_table_index,
+ inst->bits3.dp_render_cache.msg_control,
+ inst->bits3.dp_render_cache.msg_type,
+ inst->bits3.dp_render_cache.send_commit_msg,
+ inst->bits3.dp_render_cache.msg_length,
+ inst->bits3.dp_render_cache.response_length);
+ } else {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ }
break;
case BRW_MESSAGE_TARGET_URB:
- format (file, " %d", inst->bits3.urb.offset);
- space = 1;
+ if (gen >= 5) {
+ format (file, " %d", inst->bits3.urb_gen5.offset);
+ } else {
+ format (file, " %d", inst->bits3.urb.offset);
+ }
err |= control (file, "urb swizzle", urb_swizzle,
inst->bits3.urb.swizzle_control, &space);
err |= control (file, "urb allocate", urb_allocate,
@@ -868,6 +956,11 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
inst->bits3.urb.used, &space);
err |= control (file, "urb complete", urb_complete,
inst->bits3.urb.complete, &space);
+ if (gen >= 5) {
+ format (file, " mlen %d, rlen %d\n",
+ inst->bits3.urb_gen5.msg_length,
+ inst->bits3.urb_gen5.response_length);
+ }
break;
case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
break;
@@ -877,10 +970,17 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
}
if (space)
string (file, " ");
- format (file, "mlen %d",
- inst->bits3.generic.msg_length);
- format (file, " rlen %d",
- inst->bits3.generic.response_length);
+ if (gen >= 5) {
+ format (file, "mlen %d",
+ inst->bits3.generic_gen5.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic_gen5.response_length);
+ } else {
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
}
pad (file, 64);
if (inst->header.opcode != BRW_OPCODE_NOP) {
@@ -891,7 +991,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
- if (inst->header.opcode == BRW_OPCODE_SEND)
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC)
err |= control (file, "end of thread", end_of_thread,
inst->bits3.generic.end_of_thread, &space);
if (space)
@@ -905,13 +1006,13 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
int brw_disasm (FILE *file,
- const struct brw_instruction *inst,
- unsigned count)
+ struct brw_instruction *inst,
+ unsigned count, int gen)
{
int i, err;
for (i = 0; i < count; i++) {
- err = brw_disasm_insn(stderr, &inst[i]);
+ err = brw_disasm_insn(stderr, &inst[i], gen);
if (err)
return err;
}
diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h
index ba5b109c48..ce451ed5a0 100644
--- a/src/gallium/drivers/i965/brw_disasm.h
+++ b/src/gallium/drivers/i965/brw_disasm.h
@@ -27,10 +27,10 @@
struct brw_instruction;
-int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
+int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen);
int brw_disasm (FILE *file,
- const struct brw_instruction *inst,
- unsigned count);
+ struct brw_instruction *inst,
+ unsigned count, int gen);
#endif
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index ebeb1e146a..04ec5c81a6 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -89,13 +89,16 @@ static int brw_prepare_vertices(struct brw_context *brw)
vb->buffer->width0 - vb->buffer_offset :
MAX2(vb->buffer->width0 - vb->buffer_offset,
vb->stride * (max_index + 1 - min_index)));
+ boolean flushed;
- ret = u_upload_buffer( brw->vb.upload_vertex,
+ ret = u_upload_buffer( brw->vb.upload_vertex,
+ 0,
vb->buffer_offset + min_index * vb->stride,
size,
vb->buffer,
&offset,
- &upload_buf );
+ &upload_buf,
+ &flushed );
if (ret)
return ret;
@@ -167,7 +170,7 @@ static int brw_emit_vertex_buffers( struct brw_context *brw )
OUT_RELOC(brw->vb.vb[i].bo,
BRW_USAGE_VERTEX,
brw->vb.vb[i].offset);
- if (BRW_IS_IGDNG(brw)) {
+ if (brw->gen == 5) {
OUT_RELOC(brw->vb.vb[i].bo,
BRW_USAGE_VERTEX,
brw->vb.vb[i].bo->size - 1);
@@ -251,13 +254,16 @@ static int brw_prepare_indices(struct brw_context *brw)
/* Turn userbuffer into a proper hardware buffer?
*/
if (brw_buffer_is_user_buffer(index_buffer)) {
+ boolean flushed;
ret = u_upload_buffer( brw->vb.upload_index,
+ 0,
index_offset,
ib_size,
index_buffer,
&offset,
- &upload_buf );
+ &upload_buf,
+ &flushed );
if (ret)
return ret;
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
index 00d8eaccbc..ba1159e4c3 100644
--- a/src/gallium/drivers/i965/brw_eu_emit.c
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -255,19 +255,19 @@ static void brw_set_math_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.math_igdng.function = function;
- insn->bits3.math_igdng.int_type = integer_type;
- insn->bits3.math_igdng.precision = low_precision;
- insn->bits3.math_igdng.saturate = saturate;
- insn->bits3.math_igdng.data_type = dataType;
- insn->bits3.math_igdng.snapshot = 0;
- insn->bits3.math_igdng.header_present = 0;
- insn->bits3.math_igdng.response_length = response_length;
- insn->bits3.math_igdng.msg_length = msg_length;
- insn->bits3.math_igdng.end_of_thread = 0;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
- insn->bits2.send_igdng.end_of_thread = 0;
+ if (brw->gen == 5) {
+ insn->bits3.math_gen5.function = function;
+ insn->bits3.math_gen5.int_type = integer_type;
+ insn->bits3.math_gen5.precision = low_precision;
+ insn->bits3.math_gen5.saturate = saturate;
+ insn->bits3.math_gen5.data_type = dataType;
+ insn->bits3.math_gen5.snapshot = 0;
+ insn->bits3.math_gen5.header_present = 0;
+ insn->bits3.math_gen5.response_length = response_length;
+ insn->bits3.math_gen5.msg_length = msg_length;
+ insn->bits3.math_gen5.end_of_thread = 0;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
+ insn->bits2.send_gen5.end_of_thread = 0;
} else {
insn->bits3.math.function = function;
insn->bits3.math.int_type = integer_type;
@@ -295,18 +295,18 @@ static void brw_set_ff_sync_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.urb_igdng.opcode = 1;
- insn->bits3.urb_igdng.offset = offset;
- insn->bits3.urb_igdng.swizzle_control = swizzle_control;
- insn->bits3.urb_igdng.allocate = allocate;
- insn->bits3.urb_igdng.used = used;
- insn->bits3.urb_igdng.complete = complete;
- insn->bits3.urb_igdng.header_present = 1;
- insn->bits3.urb_igdng.response_length = response_length;
- insn->bits3.urb_igdng.msg_length = msg_length;
- insn->bits3.urb_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ insn->bits3.urb_gen5.opcode = 1;
+ insn->bits3.urb_gen5.offset = offset;
+ insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+ insn->bits3.urb_gen5.allocate = allocate;
+ insn->bits3.urb_gen5.used = used;
+ insn->bits3.urb_gen5.complete = complete;
+ insn->bits3.urb_gen5.header_present = 1;
+ insn->bits3.urb_gen5.response_length = response_length;
+ insn->bits3.urb_gen5.msg_length = msg_length;
+ insn->bits3.urb_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
}
static void brw_set_urb_message( struct brw_context *brw,
@@ -322,19 +322,19 @@ static void brw_set_urb_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.urb_igdng.opcode = 0; /* ? */
- insn->bits3.urb_igdng.offset = offset;
- insn->bits3.urb_igdng.swizzle_control = swizzle_control;
- insn->bits3.urb_igdng.allocate = allocate;
- insn->bits3.urb_igdng.used = used; /* ? */
- insn->bits3.urb_igdng.complete = complete;
- insn->bits3.urb_igdng.header_present = 1;
- insn->bits3.urb_igdng.response_length = response_length;
- insn->bits3.urb_igdng.msg_length = msg_length;
- insn->bits3.urb_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ if (brw->gen == 5) {
+ insn->bits3.urb_gen5.opcode = 0; /* ? */
+ insn->bits3.urb_gen5.offset = offset;
+ insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+ insn->bits3.urb_gen5.allocate = allocate;
+ insn->bits3.urb_gen5.used = used; /* ? */
+ insn->bits3.urb_gen5.complete = complete;
+ insn->bits3.urb_gen5.header_present = 1;
+ insn->bits3.urb_gen5.response_length = response_length;
+ insn->bits3.urb_gen5.msg_length = msg_length;
+ insn->bits3.urb_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else {
insn->bits3.urb.opcode = 0; /* ? */
insn->bits3.urb.offset = offset;
@@ -361,18 +361,18 @@ static void brw_set_dp_write_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
- insn->bits3.dp_write_igdng.msg_control = msg_control;
- insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
- insn->bits3.dp_write_igdng.msg_type = msg_type;
- insn->bits3.dp_write_igdng.send_commit_msg = 0;
- insn->bits3.dp_write_igdng.header_present = 1;
- insn->bits3.dp_write_igdng.response_length = response_length;
- insn->bits3.dp_write_igdng.msg_length = msg_length;
- insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ if (brw->gen == 5) {
+ insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
+ insn->bits3.dp_write_gen5.msg_control = msg_control;
+ insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write_gen5.msg_type = msg_type;
+ insn->bits3.dp_write_gen5.send_commit_msg = 0;
+ insn->bits3.dp_write_gen5.header_present = 1;
+ insn->bits3.dp_write_gen5.response_length = response_length;
+ insn->bits3.dp_write_gen5.msg_length = msg_length;
+ insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else {
insn->bits3.dp_write.binding_table_index = binding_table_index;
insn->bits3.dp_write.msg_control = msg_control;
@@ -398,18 +398,18 @@ static void brw_set_dp_read_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
- insn->bits3.dp_read_igdng.msg_control = msg_control;
- insn->bits3.dp_read_igdng.msg_type = msg_type;
- insn->bits3.dp_read_igdng.target_cache = target_cache;
- insn->bits3.dp_read_igdng.header_present = 1;
- insn->bits3.dp_read_igdng.response_length = response_length;
- insn->bits3.dp_read_igdng.msg_length = msg_length;
- insn->bits3.dp_read_igdng.pad1 = 0;
- insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ if (brw->gen == 5) {
+ insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
+ insn->bits3.dp_read_gen5.msg_control = msg_control;
+ insn->bits3.dp_read_gen5.msg_type = msg_type;
+ insn->bits3.dp_read_gen5.target_cache = target_cache;
+ insn->bits3.dp_read_gen5.header_present = 1;
+ insn->bits3.dp_read_gen5.response_length = response_length;
+ insn->bits3.dp_read_gen5.msg_length = msg_length;
+ insn->bits3.dp_read_gen5.pad1 = 0;
+ insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else {
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
@@ -437,18 +437,18 @@ static void brw_set_sampler_message(struct brw_context *brw,
assert(eot == 0);
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
- insn->bits3.sampler_igdng.sampler = sampler;
- insn->bits3.sampler_igdng.msg_type = msg_type;
- insn->bits3.sampler_igdng.simd_mode = simd_mode;
- insn->bits3.sampler_igdng.header_present = header_present;
- insn->bits3.sampler_igdng.response_length = response_length;
- insn->bits3.sampler_igdng.msg_length = msg_length;
- insn->bits3.sampler_igdng.end_of_thread = eot;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
- insn->bits2.send_igdng.end_of_thread = eot;
- } else if (BRW_IS_G4X(brw)) {
+ if (brw->gen == 5) {
+ insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
+ insn->bits3.sampler_gen5.sampler = sampler;
+ insn->bits3.sampler_gen5.msg_type = msg_type;
+ insn->bits3.sampler_gen5.simd_mode = simd_mode;
+ insn->bits3.sampler_gen5.header_present = header_present;
+ insn->bits3.sampler_gen5.response_length = response_length;
+ insn->bits3.sampler_gen5.msg_length = msg_length;
+ insn->bits3.sampler_gen5.end_of_thread = eot;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+ insn->bits2.send_gen5.end_of_thread = eot;
+ } else if (brw->is_g4x) {
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
insn->bits3.sampler_g4x.sampler = sampler;
insn->bits3.sampler_g4x.msg_type = msg_type;
@@ -478,7 +478,7 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
if (0 && (BRW_DEBUG & DEBUG_DISASSEM))
{
if (p->nr_insn)
- brw_disasm_insn(stderr, &p->store[p->nr_insn-1]);
+ brw_disasm_insn(stderr, &p->store[p->nr_insn-1], p->brw->gen);
}
assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
@@ -658,7 +658,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
struct brw_instruction *insn;
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
br = 2;
if (p->single_program_flow) {
@@ -699,7 +699,7 @@ void brw_ENDIF(struct brw_compile *p,
{
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
br = 2;
if (p->single_program_flow) {
@@ -813,7 +813,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *insn;
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
br = 2;
if (p->single_program_flow)
@@ -856,7 +856,7 @@ void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *landing = &p->store[p->nr_insn];
GLuint jmpi = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
jmpi = 2;
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index 06826635a8..2a8165b83e 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -51,13 +51,13 @@ static enum pipe_error compile_gs_prog( struct brw_context *brw,
memset(&c, 0, sizeof(c));
c.key = *key;
- c.need_ff_sync = BRW_IS_IGDNG(brw);
+ c.need_ff_sync = brw->gen == 5;
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.nr_attrs = c.key.nr_attrs;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
else
c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index b64ec286ce..6e070f6d75 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -103,7 +103,7 @@ gs_unit_create_from_key(struct brw_context *brw,
else
gs.thread4.max_threads = 0;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
gs.thread4.rendering_enable = 1;
if (BRW_DEBUG & DEBUG_STATS)
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index 6d89b5d2ba..d53ce6ccfd 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -239,7 +239,7 @@ static int prepare_depthbuffer(struct brw_context *brw)
static int emit_depthbuffer(struct brw_context *brw)
{
struct pipe_surface *surface = brw->curr.fb.zsbuf;
- unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
+ unsigned int len = (brw->is_g4x || brw->gen == 5) ? 6 : 5;
if (surface == NULL) {
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
@@ -250,7 +250,7 @@ static int emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(0);
OUT_BATCH(0);
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -298,7 +298,7 @@ static int emit_depthbuffer(struct brw_context *brw)
((surface->height - 1) << 19));
OUT_BATCH(0);
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -363,10 +363,10 @@ const struct brw_tracked_state brw_line_stipple = {
/***********************************************************************
- * Misc invarient state packets
+ * Misc invariant state packets
*/
-static int upload_invarient_state( struct brw_context *brw )
+static int upload_invariant_state( struct brw_context *brw )
{
{
/* 0x61040000 Pipeline Select */
@@ -374,7 +374,7 @@ static int upload_invarient_state( struct brw_context *brw )
struct brw_pipeline_select ps;
memset(&ps, 0, sizeof(ps));
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
ps.header.opcode = CMD_PIPELINE_SELECT_GM45;
else
ps.header.opcode = CMD_PIPELINE_SELECT_965;
@@ -413,7 +413,7 @@ static int upload_invarient_state( struct brw_context *brw )
struct brw_vf_statistics vfs;
memset(&vfs, 0, sizeof(vfs));
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
vfs.opcode = CMD_VF_STATISTICS_GM45;
else
vfs.opcode = CMD_VF_STATISTICS_965;
@@ -424,7 +424,7 @@ static int upload_invarient_state( struct brw_context *brw )
BRW_BATCH_STRUCT(brw, &vfs);
}
- if (!BRW_IS_965(brw))
+ if (!(brw->gen == 4))
{
struct brw_aa_line_parameters balp;
@@ -439,7 +439,7 @@ static int upload_invarient_state( struct brw_context *brw )
{
struct brw_polygon_stipple_offset bpso;
- /* This is invarient state in gallium:
+ /* This is invariant state in gallium:
*/
memset(&bpso, 0, sizeof(bpso));
bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
@@ -453,13 +453,13 @@ static int upload_invarient_state( struct brw_context *brw )
return 0;
}
-const struct brw_tracked_state brw_invarient_state = {
+const struct brw_tracked_state brw_invariant_state = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
- .emit = upload_invarient_state
+ .emit = upload_invariant_state
};
@@ -480,7 +480,7 @@ static int upload_state_base_address( struct brw_context *brw )
/* Output the structure (brw_state_base_address) directly to the
* batchbuffer, so we can emit relocations inline.
*/
- if (BRW_IS_IGDNG(brw)) {
+ if (brw->gen == 5) {
BEGIN_BATCH(8, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
OUT_BATCH(1); /* General state base address */
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index 0ae1a6be9e..3b4a99beed 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -38,7 +38,6 @@ void brw_context_flush( struct brw_context *brw )
static void
brw_flush( struct pipe_context *pipe,
- unsigned flags,
struct pipe_fence_handle **fence )
{
brw_context_flush( brw_context( pipe ) );
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
index 4c1a6d7dcd..c86681d149 100644
--- a/src/gallium/drivers/i965/brw_pipe_rast.c
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -35,7 +35,7 @@ calculate_clip_key_rast( const struct brw_context *brw,
{
memset(key, 0, sizeof *key);
- if (brw->chipset.is_igdng)
+ if (brw->gen == 5)
key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
else
key->clip_mode = BRW_CLIPMODE_NORMAL;
diff --git a/src/gallium/drivers/i965/brw_pipe_surface.c b/src/gallium/drivers/i965/brw_pipe_surface.c
index 4deead98b1..58a610089e 100644
--- a/src/gallium/drivers/i965/brw_pipe_surface.c
+++ b/src/gallium/drivers/i965/brw_pipe_surface.c
@@ -169,20 +169,15 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
surface->ss.ss1.base_addr = surface->offset - tile_offset;
- if (brw_screen->chipset.is_g4x) {
- if (tex->tiling == BRW_TILING_X) {
- /* Note that the low bits of these fields are missing, so
- * there's the possibility of getting in trouble.
- */
- surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4;
- surface->ss.ss5.y_offset = tile_offset / 512 / 2;
- } else {
- surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4;
+ if (tex->tiling == BRW_TILING_X) {
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4;
+ surface->ss.ss5.y_offset = tile_offset / 512 / 2;
+ } else {
+ surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4;
surface->ss.ss5.y_offset = tile_offset / 128 / 2;
- }
- }
- else {
- assert(tile_offset == 0);
}
}
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index 007239efc4..570ea23ff4 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -4,6 +4,7 @@
#include "util/u_memory.h"
#include "util/u_format.h"
+#include "util/u_transfer.h"
static unsigned brw_translate_surface_format( unsigned id )
@@ -203,7 +204,7 @@ static void brw_translate_vertex_elements(struct brw_context *brw,
brw_velems->ve[i].ve1.vfcomponent2 = comp2;
brw_velems->ve[i].ve1.vfcomponent3 = comp3;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
brw_velems->ve[i].ve1.dst_offset = 0;
else
brw_velems->ve[i].ve1.dst_offset = i * 4;
@@ -248,7 +249,6 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe,
const struct pipe_vertex_buffer *buffers)
{
struct brw_context *brw = brw_context(pipe);
- unsigned i;
/* Check for no change */
if (count == brw->curr.num_vertex_buffers &&
@@ -257,18 +257,9 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe,
count * sizeof buffers[0]) == 0)
return;
- /* Adjust refcounts */
- for (i = 0; i < count; i++)
- pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer,
- buffers[i].buffer);
-
- for ( ; i < brw->curr.num_vertex_buffers; i++)
- pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer,
- NULL);
-
- /* Copy remaining data */
- memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]);
- brw->curr.num_vertex_buffers = count;
+ util_copy_vertex_buffers(brw->curr.vertex_buffer,
+ &brw->curr.num_vertex_buffers,
+ buffers, count);
brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER;
}
@@ -312,15 +303,20 @@ brw_pipe_vertex_init( struct brw_context *brw )
brw->base.create_vertex_elements_state = brw_create_vertex_elements_state;
brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state;
brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state;
+ brw->base.redefine_user_buffer = u_default_redefine_user_buffer;
}
void
brw_pipe_vertex_cleanup( struct brw_context *brw )
{
+ unsigned i;
/* Release bound pipe vertex_buffers
*/
+ for (i = 0; i < brw->curr.num_vertex_buffers; i++) {
+ pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer, NULL);
+ }
/* Release some other stuff
*/
diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h
index ba10f9d5df..53c7c43571 100644
--- a/src/gallium/drivers/i965/brw_reg.h
+++ b/src/gallium/drivers/i965/brw_reg.h
@@ -93,18 +93,54 @@
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G41_G 0x2E32
#define PCI_CHIP_B43_G 0x2E42
+#define PCI_CHIP_B43_G1 0x2E92
#define PCI_CHIP_ILD_G 0x0042
#define PCI_CHIP_ILM_G 0x0046
-struct brw_chipset {
- unsigned pci_id:16;
- unsigned is_965:1;
- unsigned is_igdng:1;
- unsigned is_g4x:1;
- unsigned pad:13;
-};
-
+#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
+#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+ devid == PCI_CHIP_Q45_G || \
+ devid == PCI_CHIP_G45_G || \
+ devid == PCI_CHIP_G41_G || \
+ devid == PCI_CHIP_B43_G || \
+ devid == PCI_CHIP_B43_G1)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \
+ devid == PCI_CHIP_I965_Q || \
+ devid == PCI_CHIP_I965_G_1 || \
+ devid == PCI_CHIP_I965_GM || \
+ devid == PCI_CHIP_I965_GME || \
+ devid == PCI_CHIP_I946_GZ || \
+ IS_G4X(devid))
+
+#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G)
+#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid))
+
+#define IS_IRONLAKE(devid) IS_GEN5(devid)
+
+#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+ devid == PCI_CHIP_SANDYBRIDGE_S)
+
+#define IS_965(devid) (IS_GEN4(devid) || \
+ IS_G4X(devid) || \
+ IS_GEN5(devid) || \
+ IS_GEN6(devid))
/* XXX: hacks
*/
diff --git a/src/gallium/drivers/i965/brw_resource_buffer.c b/src/gallium/drivers/i965/brw_resource_buffer.c
index afb96ee3e7..32dc54f2b2 100644
--- a/src/gallium/drivers/i965/brw_resource_buffer.c
+++ b/src/gallium/drivers/i965/brw_resource_buffer.c
@@ -91,30 +91,10 @@ brw_buffer_transfer_unmap( struct pipe_context *pipe,
}
-static unsigned brw_buffer_is_referenced( struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level,
- int layer)
-{
- struct brw_context *brw = brw_context(pipe);
- struct brw_winsys_buffer *batch_bo = brw->batch->buf;
- struct brw_buffer *buf = brw_buffer(resource);
-
- if (buf->bo == NULL)
- return PIPE_UNREFERENCED;
-
- if (!brw_screen(pipe->screen)->sws->bo_references( batch_bo, buf->bo ))
- return PIPE_UNREFERENCED;
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-
struct u_resource_vtbl brw_buffer_vtbl =
{
brw_buffer_get_handle, /* get_handle */
brw_buffer_destroy, /* resource_destroy */
- brw_buffer_is_referenced, /* is_resource_referenced */
u_default_get_transfer, /* get_transfer */
u_default_transfer_destroy, /* transfer_destroy */
brw_buffer_transfer_map, /* transfer_map */
diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c
index fded2da382..71a1829024 100644
--- a/src/gallium/drivers/i965/brw_resource_texture.c
+++ b/src/gallium/drivers/i965/brw_resource_texture.c
@@ -225,48 +225,6 @@ static void brw_texture_destroy(struct pipe_screen *screen,
}
-
-
-static unsigned brw_texture_is_referenced( struct pipe_context *pipe,
- struct pipe_resource *texture,
- unsigned level,
- int layer )
-{
- struct brw_context *brw = brw_context(pipe);
- struct brw_screen *bscreen = brw_screen(pipe->screen);
- struct brw_winsys_buffer *batch_bo = brw->batch->buf;
- struct brw_texture *tex = brw_texture(texture);
- struct brw_surface *surf;
- int i;
-
- /* XXX: this is subject to false positives if the underlying
- * texture BO is referenced, we can't tell whether the sub-region
- * we care about participates in that.
- */
- if (bscreen->sws->bo_references( batch_bo, tex->bo ))
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-
- /* Find any view on this texture for this level/layer and see if it
- * is referenced:
- */
- for (i = 0; i < 2; i++) {
- foreach (surf, &tex->views[i]) {
- if (surf->bo == tex->bo)
- continue;
-
- if (!(layer == -1 || surf->id.bits.layer == layer) ||
- surf->id.bits.level != level)
- continue;
-
- if (bscreen->sws->bo_references( batch_bo, surf->bo))
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
- }
- }
-
- return PIPE_UNREFERENCED;
-}
-
-
/*
* Transfer functions
*/
@@ -347,7 +305,6 @@ struct u_resource_vtbl brw_texture_vtbl =
{
brw_texture_get_handle, /* get_handle */
brw_texture_destroy, /* resource_destroy */
- brw_texture_is_referenced, /* is_resource_referenced */
brw_texture_get_transfer, /* get_transfer */
u_default_transfer_destroy, /* transfer_destroy */
brw_texture_transfer_map, /* transfer_map */
@@ -392,7 +349,7 @@ brw_texture_create( struct pipe_screen *screen,
if (tex->compressed == 0 &&
!bscreen->no_tiling)
{
- if (bscreen->chipset.is_965 &&
+ if (bscreen->gen < 5 &&
util_format_is_depth_or_stencil(template->format))
tex->tiling = BRW_TILING_Y;
else
diff --git a/src/gallium/drivers/i965/brw_resource_texture_layout.c b/src/gallium/drivers/i965/brw_resource_texture_layout.c
index 2187bdd82c..afecc77e31 100644
--- a/src/gallium/drivers/i965/brw_resource_texture_layout.c
+++ b/src/gallium/drivers/i965/brw_resource_texture_layout.c
@@ -388,7 +388,7 @@ GLboolean brw_texture_layout(struct brw_screen *brw_screen,
{
switch (tex->b.b.target) {
case PIPE_TEXTURE_CUBE:
- if (brw_screen->chipset.is_igdng)
+ if (brw_screen->gen == 5)
brw_layout_cubemap_idgng( tex );
else
brw_layout_3d_cube( tex );
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index f5b75b17e3..25204fd088 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -97,7 +97,7 @@ brw_get_name(struct pipe_screen *screen)
static char buffer[128];
const char *chipset;
- switch (brw_screen(screen)->chipset.pci_id) {
+ switch (brw_screen(screen)->pci_id) {
case PCI_CHIP_I965_G:
chipset = "I965_G";
break;
@@ -278,8 +278,7 @@ brw_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage,
- unsigned geom_flags)
+ unsigned tex_usage)
{
static const enum pipe_format tex_supported[] = {
PIPE_FORMAT_L8_UNORM,
@@ -365,20 +364,19 @@ brw_fence_reference(struct pipe_screen *screen,
{
}
-static int
+static boolean
brw_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flags)
+ struct pipe_fence_handle *fence)
{
- return 0; /* XXX shouldn't this be a boolean? */
+ return TRUE;
}
-static int
+static boolean
brw_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
- unsigned flags)
+ uint64_t timeout)
{
- return 0;
+ return TRUE;
}
@@ -405,8 +403,6 @@ struct pipe_screen *
brw_screen_create(struct brw_winsys_screen *sws)
{
struct brw_screen *bscreen;
- struct brw_chipset chipset;
-
#ifdef DEBUG
BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
@@ -415,46 +411,30 @@ brw_screen_create(struct brw_winsys_screen *sws)
BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0);
#endif
- memset(&chipset, 0, sizeof chipset);
-
- chipset.pci_id = sws->pci_id;
-
- switch (chipset.pci_id) {
- case PCI_CHIP_I965_G:
- case PCI_CHIP_I965_Q:
- case PCI_CHIP_I965_G_1:
- case PCI_CHIP_I946_GZ:
- case PCI_CHIP_I965_GM:
- case PCI_CHIP_I965_GME:
- chipset.is_965 = TRUE;
- break;
-
- case PCI_CHIP_GM45_GM:
- case PCI_CHIP_IGD_E_G:
- case PCI_CHIP_Q45_G:
- case PCI_CHIP_G45_G:
- case PCI_CHIP_G41_G:
- case PCI_CHIP_B43_G:
- chipset.is_g4x = TRUE;
- break;
-
- case PCI_CHIP_ILD_G:
- case PCI_CHIP_ILM_G:
- chipset.is_igdng = TRUE;
- break;
+ bscreen = CALLOC_STRUCT(brw_screen);
+ if (!bscreen)
+ return NULL;
- default:
+ bscreen->pci_id = sws->pci_id;
+ if (IS_GEN6(sws->pci_id)) {
+ bscreen->gen = 6;
+ bscreen->needs_ff_sync = TRUE;
+ } else if (IS_GEN5(sws->pci_id)) {
+ bscreen->gen = 5;
+ bscreen->needs_ff_sync = TRUE;
+ } else if (IS_965(sws->pci_id)) {
+ bscreen->gen = 4;
+ if (IS_G4X(sws->pci_id)) {
+ bscreen->is_g4x = true;
+ }
+ } else {
debug_printf("%s: unknown pci id 0x%x, cannot create screen\n",
- __FUNCTION__, chipset.pci_id);
+ __FUNCTION__, sws->pci_id);
+ free(bscreen);
return NULL;
}
-
- bscreen = CALLOC_STRUCT(brw_screen);
- if (!bscreen)
- return NULL;
-
- bscreen->chipset = chipset;
+ sws->gen = bscreen->gen;
bscreen->sws = sws;
bscreen->base.winsys = NULL;
bscreen->base.destroy = brw_destroy_screen;
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index 58e293bc76..a62e1afc40 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -43,7 +43,11 @@ struct brw_winsys_screen;
struct brw_screen
{
struct pipe_screen base;
- struct brw_chipset chipset;
+ int gen;
+ boolean has_negative_rhw_bug;
+ boolean needs_ff_sync;
+ boolean is_g4x;
+ int pci_id;
struct brw_winsys_screen *sws;
boolean no_tiling;
};
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
index 497634ec9e..901c334164 100644
--- a/src/gallium/drivers/i965/brw_sf_emit.c
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -161,7 +161,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
jmpi = 2;
brw_push_insn_state(p);
@@ -205,7 +205,7 @@ static void do_flatshade_line( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
jmpi = 2;
brw_push_insn_state(p);
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 6c299a86b4..eec024650c 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -148,7 +148,7 @@ sf_unit_create_from_key(struct brw_context *brw,
sf.thread3.dispatch_grf_start_reg = 3;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
sf.thread3.urb_entry_read_offset = 3;
else
sf.thread3.urb_entry_read_offset = 1;
@@ -161,7 +161,7 @@ sf_unit_create_from_key(struct brw_context *brw,
/* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
* 48(IGDNG) threads
*/
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
chipset_max_threads = 48;
else
chipset_max_threads = 24;
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index d2bbd0123d..380d511f9b 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -56,7 +56,7 @@ const struct brw_tracked_state brw_clip_prog;
const struct brw_tracked_state brw_clip_unit;
const struct brw_tracked_state brw_curbe_buffer;
const struct brw_tracked_state brw_curbe_offsets;
-const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_invariant_state;
const struct brw_tracked_state brw_gs_prog;
const struct brw_tracked_state brw_gs_unit;
const struct brw_tracked_state brw_line_stipple;
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index f8b91eff81..cdbf270e06 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -69,7 +69,7 @@ const struct brw_tracked_state *atoms[] =
/* Command packets:
*/
- &brw_invarient_state,
+ &brw_invariant_state,
&brw_state_base_address,
&brw_binding_table_pointers,
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
index e97ddeb5e1..b0d75b4f82 100644
--- a/src/gallium/drivers/i965/brw_structs.h
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -279,7 +279,7 @@ struct brw_aa_line_parameters
struct header header;
struct {
- GLuint aa_coverage_scope:8;
+ GLuint aa_coverage_slope:8;
GLuint pad0:8;
GLuint aa_coverage_bias:8;
GLuint pad1:8;
@@ -659,7 +659,105 @@ struct brw_clip_unit_state
GLfloat viewport_ymax;
};
+struct gen6_blend_state
+{
+ struct {
+ GLuint dest_blend_factor:5;
+ GLuint source_blend_factor:5;
+ GLuint pad3:1;
+ GLuint blend_func:3;
+ GLuint pad2:1;
+ GLuint ia_dest_blend_factor:5;
+ GLuint ia_source_blend_factor:5;
+ GLuint pad1:1;
+ GLuint ia_blend_func:3;
+ GLuint pad0:1;
+ GLuint ia_blend_enable:1;
+ GLuint blend_enable:1;
+ } blend0;
+
+ struct {
+ GLuint post_blend_clamp_enable:1;
+ GLuint pre_blend_clamp_enable:1;
+ GLuint clamp_range:2;
+ GLuint pad0:4;
+ GLuint x_dither_offset:2;
+ GLuint y_dither_offset:2;
+ GLuint dither_enable:1;
+ GLuint alpha_test_func:3;
+ GLuint alpha_test_enable:1;
+ GLuint pad1:1;
+ GLuint logic_op_func:4;
+ GLuint logic_op_enable:1;
+ GLuint pad2:1;
+ GLuint write_disable_b:1;
+ GLuint write_disable_g:1;
+ GLuint write_disable_r:1;
+ GLuint write_disable_a:1;
+ GLuint pad3:1;
+ GLuint alpha_to_coverage_dither:1;
+ GLuint alpha_to_one:1;
+ GLuint alpha_to_coverage:1;
+ } blend1;
+};
+struct gen6_color_calc_state
+{
+ struct {
+ GLuint alpha_test_format:1;
+ GLuint pad0:14;
+ GLuint round_disable:1;
+ GLuint bf_stencil_ref:8;
+ GLuint stencil_ref:8;
+ } cc0;
+
+ union {
+ GLfloat alpha_ref_f;
+ struct {
+ GLuint ui:8;
+ GLuint pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ GLfloat constant_r;
+ GLfloat constant_g;
+ GLfloat constant_b;
+ GLfloat constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ GLuint pad0:3;
+ GLuint bf_stencil_pass_depth_pass_op:3;
+ GLuint bf_stencil_pass_depth_fail_op:3;
+ GLuint bf_stencil_fail_op:3;
+ GLuint bf_stencil_func:3;
+ GLuint bf_stencil_enable:1;
+ GLuint pad1:2;
+ GLuint stencil_write_enable:1;
+ GLuint stencil_pass_depth_pass_op:3;
+ GLuint stencil_pass_depth_fail_op:3;
+ GLuint stencil_fail_op:3;
+ GLuint stencil_func:3;
+ GLuint stencil_enable:1;
+ } ds0;
+
+ struct {
+ GLuint bf_stencil_write_mask:8;
+ GLuint bf_stencil_test_mask:8;
+ GLuint stencil_write_mask:8;
+ GLuint stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ GLuint pad0:26;
+ GLuint depth_write_enable:1;
+ GLuint depth_test_func:3;
+ GLuint pad1:1;
+ GLuint depth_test_enable:1;
+ } ds2;
+};
struct brw_cc_unit_state
{
@@ -814,6 +912,13 @@ struct brw_sf_unit_state
};
+struct gen6_scissor_rect
+{
+ GLuint xmin:16;
+ GLuint ymin:16;
+ GLuint xmax:16;
+ GLuint ymax:16;
+};
struct brw_gs_unit_state
{
@@ -825,7 +930,7 @@ struct brw_gs_unit_state
struct
{
GLuint pad0:8;
- GLuint rendering_enable:1; /* for IGDNG */
+ GLuint rendering_enable:1; /* for Ironlake */
GLuint pad4:1;
GLuint stats_enable:1;
GLuint nr_urb_entries:7;
@@ -935,7 +1040,7 @@ struct brw_wm_unit_state
GLfloat global_depth_offset_constant;
GLfloat global_depth_offset_scale;
- /* for IGDNG only */
+ /* for Ironlake only */
struct {
GLuint pad0:1;
GLuint grf_reg_count_1:3;
@@ -962,6 +1067,15 @@ struct brw_sampler_default_color {
GLfloat color[4];
};
+struct gen5_sampler_default_color {
+ uint8_t ub[4];
+ float f[4];
+ uint16_t hf[4];
+ uint16_t us[4];
+ int16_t s[4];
+ uint8_t b[4];
+};
+
struct brw_sampler_state
{
@@ -973,7 +1087,7 @@ struct brw_sampler_state
GLuint mag_filter:3;
GLuint mip_filter:2;
GLuint base_level:5;
- GLuint pad:1;
+ GLuint min_mag_neq:1;
GLuint lod_preclamp:1;
GLuint default_color_mode:1;
GLuint pad0:1;
@@ -985,7 +1099,8 @@ struct brw_sampler_state
GLuint r_wrap_mode:3;
GLuint t_wrap_mode:3;
GLuint s_wrap_mode:3;
- GLuint pad:3;
+ GLuint cube_control_mode:1;
+ GLuint pad:2;
GLuint max_lod:10;
GLuint min_lod:10;
} ss1;
@@ -999,7 +1114,9 @@ struct brw_sampler_state
struct brw_ss3
{
- GLuint pad:19;
+ GLuint non_normalized_coord:1;
+ GLuint pad:12;
+ GLuint address_round:6;
GLuint max_aniso:3;
GLuint chroma_key_mode:1;
GLuint chroma_key_index:2;
@@ -1044,6 +1161,15 @@ struct brw_sf_viewport
} scissor;
};
+struct gen6_sf_viewport {
+ GLfloat m00;
+ GLfloat m11;
+ GLfloat m22;
+ GLfloat m30;
+ GLfloat m31;
+ GLfloat m32;
+};
+
/* Documented in the subsystem/shared-functions/sampler chapter...
*/
struct brw_surface_state
@@ -1055,7 +1181,12 @@ struct brw_surface_state
GLuint cube_neg_y:1;
GLuint cube_pos_x:1;
GLuint cube_neg_x:1;
- GLuint pad:4;
+ GLuint pad:2;
+ /* Required on gen6 for surfaces accessed through render cache messages.
+ */
+ GLuint render_cache_read_write:1;
+ /* Ironlake and newer: instead of replicating one of the texels */
+ GLuint cube_corner_average:1;
GLuint mipmap_layout_mode:1;
GLuint vert_line_stride_ofs:1;
GLuint vert_line_stride:1;
@@ -1202,7 +1333,8 @@ struct brw_instruction
GLuint predicate_inverse:1;
GLuint execution_size:3;
GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
- GLuint pad0:2;
+ GLuint acc_wr_control:1;
+ GLuint cmpt_control:1;
GLuint debug_control:1;
GLuint saturate:1;
} header;
@@ -1250,7 +1382,7 @@ struct brw_instruction
GLuint dest_writemask:4;
GLuint dest_subreg_nr:1;
GLuint dest_reg_nr:8;
- GLuint pad1:2;
+ GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} da16;
@@ -1264,9 +1396,21 @@ struct brw_instruction
GLuint dest_writemask:4;
GLint dest_indirect_offset:6;
GLuint dest_subreg_nr:3;
- GLuint pad1:2;
+ GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} ia16;
+
+ struct {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2;
+ GLuint src1_reg_type:3;
+ GLuint pad:1;
+
+ GLint jump_count:16;
+ } branch_gen6;
} bits1;
@@ -1339,7 +1483,7 @@ struct brw_instruction
GLuint end_of_thread:1;
GLuint pad1:1;
GLuint sfid:4;
- } send_igdng; /* for IGDNG only */
+ } send_gen5; /* for Ironlake only */
} bits2;
@@ -1413,6 +1557,21 @@ struct brw_instruction
GLuint pad0:12;
} if_else;
+ struct
+ {
+ /* Signed jump distance to the ip to jump to if all channels
+ * are disabled after the break or continue. It should point
+ * to the end of the innermost control flow block, as that's
+ * where some channel could get re-enabled.
+ */
+ int jip:16;
+
+ /* Signed jump distance to the location to resume execution
+ * of this channel if it's enabled for the break or continue.
+ */
+ int uip:16;
+ } break_cont;
+
struct {
GLuint function:4;
GLuint int_type:1;
@@ -1440,7 +1599,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } math_igdng;
+ } math_gen5;
struct {
GLuint binding_table_index:8;
@@ -1476,7 +1635,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } sampler_igdng;
+ } sampler_gen5;
struct brw_urb_immediate urb;
@@ -1494,7 +1653,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } urb_igdng;
+ } urb_gen5;
struct {
GLuint binding_table_index:8;
@@ -1510,6 +1669,18 @@ struct brw_instruction
struct {
GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint msg_type:3;
+ GLuint target_cache:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_read_g4x;
+
+ struct {
+ GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint msg_type:3;
GLuint target_cache:2;
@@ -1519,7 +1690,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } dp_read_igdng;
+ } dp_read_gen5;
struct {
GLuint binding_table_index:8;
@@ -1546,10 +1717,38 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } dp_write_igdng;
+ } dp_write_gen5;
+
+ /* Sandybridge DP for sample cache, constant cache, render cache */
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:5;
+ GLuint msg_type:3;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_sampler_const_cache;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint slot_group_select:1;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:4;
+ GLuint send_commit_msg:1;
+ GLuint pad0:1;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_render_cache;
struct {
- GLuint pad:16;
+ GLuint function_control:16;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
@@ -1557,14 +1756,15 @@ struct brw_instruction
GLuint end_of_thread:1;
} generic;
+ /* Of this struct, only end_of_thread is not present for gen6. */
struct {
- GLuint pad:19;
+ GLuint function_control:19;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } generic_igdng;
+ } generic_gen5;
GLint d;
GLuint ud;
diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c
index cd40fc6d61..f3de2f995b 100644
--- a/src/gallium/drivers/i965/brw_structs_dump.c
+++ b/src/gallium/drivers/i965/brw_structs_dump.c
@@ -72,7 +72,7 @@ brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr)
{
debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
- debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope);
+ debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_slope);
debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias);
debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope);
debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias);
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
index 907ec56c6c..b630752809 100644
--- a/src/gallium/drivers/i965/brw_urb.c
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -147,7 +147,7 @@ static int recalculate_urb_fence( struct brw_context *brw )
brw->urb.constrained = 0;
- if (BRW_IS_IGDNG(brw)) {
+ if (brw->gen == 5) {
brw->urb.nr_vs_entries = 128;
brw->urb.nr_sf_entries = 48;
if (check_urb_layout(brw)) {
@@ -157,7 +157,7 @@ static int recalculate_urb_fence( struct brw_context *brw )
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
}
- } else if (BRW_IS_G4X(brw)) {
+ } else if (brw->is_g4x) {
brw->urb.nr_vs_entries = 64;
if (check_urb_layout(brw)) {
goto done;
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index 944d88c84c..b6d1091618 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -56,7 +56,6 @@ struct brw_vs_compile {
struct brw_compile func;
struct brw_vs_prog_key key;
struct brw_vs_prog_data prog_data;
- struct brw_chipset chipset;
struct brw_vertex_shader *vp;
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 5dcbd597dd..559f0c61d8 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -116,6 +116,7 @@ static boolean find_output_slot( struct brw_vs_compile *c,
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
+ struct brw_context *brw = c->func.brw;
GLuint i, reg = 0, subreg = 0, mrf;
int attributes_in_vue;
@@ -218,7 +219,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
*/
c->nr_outputs = c->prog_data.nr_outputs;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
mrf = 8;
else
mrf = 4;
@@ -333,7 +334,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
*/
attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
else
c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
@@ -1124,6 +1125,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
static void emit_vertex_write( struct brw_vs_compile *c)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_reg m0 = brw_message_reg(0);
struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
@@ -1143,7 +1145,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
*/
if (c->prog_data.writes_psiz ||
c->key.nr_userclip ||
- c->chipset.is_965)
+ brw->has_negative_rhw_bug)
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -1174,7 +1176,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p,
vec8(brw_null_reg()),
BRW_CONDITIONAL_L,
@@ -1202,7 +1204,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, offset(m0, 2), ndc);
- if (c->chipset.is_igdng) {
+ if (brw->gen == 5) {
/* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
/* m4, m5 contain the distances from vertex to the user clip planeXXX.
@@ -1339,6 +1341,7 @@ static void emit_insn(struct brw_vs_compile *c,
unsigned opcode = inst->Instruction.Opcode;
unsigned label = inst->Label.Label;
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_reg args[3], dst;
GLuint i;
@@ -1514,7 +1517,7 @@ static void emit_insn(struct brw_vs_compile *c,
c->loop_depth--;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
br = 2;
inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]);
@@ -1652,6 +1655,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
if (BRW_DEBUG & DEBUG_VS) {
debug_printf("vs-native:\n");
- brw_disasm(stderr, p->store, p->nr_insn);
+ brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen);
}
}
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index dadbb622e4..6d2ccfd6d9 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -100,7 +100,7 @@ vs_unit_create_from_key(struct brw_context *brw,
*/
vs.thread1.single_program_flow = 0;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
vs.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -111,16 +111,16 @@ vs_unit_create_from_key(struct brw_context *brw,
vs.thread3.urb_entry_read_offset = 0;
vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
else
vs.thread4.nr_urb_entries = key->nr_urb_entries;
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
chipset_max_threads = 72;
- else if (BRW_IS_G4X(brw))
+ else if (brw->is_g4x)
chipset_max_threads = 32;
else
chipset_max_threads = 16;
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index a06f8bb7d6..038f6f788a 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -148,7 +148,7 @@ static INLINE void make_reloc(struct brw_winsys_reloc *reloc,
struct brw_winsys_screen {
unsigned pci_id;
-
+ int gen;
/**
* Buffer functions.
*/
@@ -282,7 +282,7 @@ void brw_dump_data( unsigned pci_id,
enum brw_buffer_data_type data_type,
unsigned offset,
const void *data,
- size_t size );
+ size_t size, int gen );
#endif
diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c
index f8f6a539bc..b66b1cfccb 100644
--- a/src/gallium/drivers/i965/brw_winsys_debug.c
+++ b/src/gallium/drivers/i965/brw_winsys_debug.c
@@ -9,7 +9,7 @@ void brw_dump_data( unsigned pci_id,
enum brw_buffer_data_type data_type,
unsigned offset,
const void *data,
- size_t size )
+ size_t size, int gen )
{
if (BRW_DUMP & DUMP_ASM) {
switch (data_type) {
@@ -18,7 +18,7 @@ void brw_dump_data( unsigned pci_id,
case BRW_DATA_GS_VS_PROG:
case BRW_DATA_GS_GS_PROG:
case BRW_DATA_GS_CLIP_PROG:
- brw_disasm( stderr, data, size / sizeof(struct brw_instruction) );
+ brw_disasm( stderr, (struct brw_instruction *)data, size / sizeof(struct brw_instruction), gen );
break;
default:
break;
@@ -77,7 +77,7 @@ void brw_dump_data( unsigned pci_id,
if (BRW_DUMP & DUMP_BATCH) {
switch (data_type) {
case BRW_DATA_BATCH_BUFFER:
- intel_decode(data, size / 4, offset, pci_id);
+ intel_decode(data, size / 4, offset, pci_id, 0);
break;
default:
break;
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 8f983a60ae..6301062fd7 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -848,11 +848,11 @@ static void emit_tex( struct brw_wm_compile *c,
responseLength = 8; /* always */
- if (BRW_IS_IGDNG(p->brw)) {
+ if (p->brw->gen == 5) {
if (shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
} else {
if (shadow)
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
@@ -917,8 +917,8 @@ static void emit_txb( struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(8), coord[3]);
msgLength = 9;
- if (BRW_IS_IGDNG(p->brw))
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
+ if (p->brw->gen == 5)
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
else
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
@@ -1516,6 +1516,6 @@ void brw_wm_emit( struct brw_wm_compile *c )
if (BRW_DEBUG & DEBUG_WM) {
debug_printf("wm-native:\n");
- brw_disasm(stderr, p->store, p->nr_insn);
+ brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen);
}
}
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index f7ee55cc1c..a65e16edec 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -812,7 +812,7 @@ static void precalc_tex( struct brw_wm_compile *c,
}
/* XXX: add GL_EXT_texture_swizzle support to gallium -- by
- * generating shader varients in mesa state tracker.
+ * generating shader variants in mesa state tracker.
*/
/* Release this temp if we ended up allocating it:
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index 3b3afc39d3..fb8e40d928 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -1607,7 +1607,7 @@ static void emit_txb(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
- if (BRW_IS_IGDNG(p->brw)) {
+ if (p->brw->gen == 5) {
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
} else {
/* Does it work well on SIMD8? */
@@ -1688,7 +1688,7 @@ static void emit_tex(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
}
- if (BRW_IS_IGDNG(p->brw)) {
+ if (p->brw->gen == 5) {
if (shadow)
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
else
@@ -1970,7 +1970,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_
struct brw_instruction *inst0, *inst1;
GLuint br = 1;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
br = 2;
loop_depth--;
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index efc2d96be1..a690003ecb 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -70,9 +70,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
key->max_threads = 1;
else {
/* WM maximum threads is number of EUs times number of threads per EU. */
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
key->max_threads = 12 * 6;
- else if (BRW_IS_G4X(brw))
+ else if (brw->is_g4x)
key->max_threads = 10 * 5;
else
key->max_threads = 8 * 4;
@@ -155,7 +155,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
wm.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -174,7 +174,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
wm.wm4.sampler_count = 0; /* hardware requirement */
else
wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
@@ -277,7 +277,7 @@ static enum pipe_error upload_wm_unit( struct brw_context *brw )
grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
per_thread_scratch_space = key.total_scratch / 1024 - 1;
stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm;
- sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4;
+ sampler_count = brw->gen == 5 ? 0 :(key.sampler_count + 1) / 4;
/* Emit WM program relocation */
make_reloc(&reloc[nr_reloc++],
diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c
index 36c04a3165..1abe869f1a 100644
--- a/src/gallium/drivers/i965/intel_decode.c
+++ b/src/gallium/drivers/i965/intel_decode.c
@@ -42,10 +42,11 @@
#include "util/u_memory.h"
#include "util/u_string.h"
+
#include "intel_decode.h"
+#include "brw_reg.h"
/*#include "intel_chipset.h"*/
-#define IS_965(x) 1 /* XXX */
#define IS_9XX(x) 1 /* XXX */
#define BUFFER_FAIL(_count, _len, _name) do { \
@@ -99,10 +100,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
} opcodes_mi[] = {
{ 0x08, 0, 1, 1, "MI_ARB_ON_OFF" },
{ 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+ { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" },
{ 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" },
{ 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
{ 0x04, 0, 1, 1, "MI_FLUSH" },
- { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" },
+ { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" },
{ 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
{ 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
{ 0x00, 0, 1, 1, "MI_NOOP" },
@@ -116,6 +118,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" },
};
+ switch ((data[0] & 0x1f800000) >> 23) {
+ case 0x0a:
+ instr_out(data, hw_offset, 0, "MI_BATCH_BUFFER_END\n");
+ return -1;
+ }
for (opcode = 0; opcode < Elements(opcodes_mi); opcode++) {
if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) {
@@ -308,9 +315,13 @@ decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
static int
decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
{
- switch ((data[0] & 0x00f80000) >> 19) {
+ uint32_t opcode;
+
+ opcode = (data[0] & 0x00f80000) >> 19;
+
+ switch (opcode) {
case 0x11:
- instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n");
+ instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n");
return 1;
case 0x10:
instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n");
@@ -326,7 +337,8 @@ decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
return 1;
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n",
+ opcode);
(*failures)++;
return 1;
}
@@ -384,7 +396,7 @@ i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask
sprintf(dstname, "oD%s%s", dstmask, sat);
break;
case 6:
- if (dst_nr > 2)
+ if (dst_nr > 3)
fprintf(out, "bad destination reg U%d\n", dst_nr);
sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
break;
@@ -455,7 +467,7 @@ i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
break;
case 6:
sprintf(name, "U%d", src_nr);
- if (src_nr > 2)
+ if (src_nr > 3)
fprintf(out, "bad src reg %s\n", name);
break;
default:
@@ -800,10 +812,14 @@ i915_decode_instruction(const uint32_t *data, uint32_t hw_offset,
}
static int
-decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
+decode_3d_1d(const uint32_t *data, int count,
+ uint32_t hw_offset,
+ uint32_t devid,
+ int *failures)
{
- unsigned int len, i, c, opcode, word, map, sampler, instr;
+ unsigned int len, i, c, idx, word, map, sampler, instr;
char *format;
+ uint32_t opcode;
struct {
uint32_t opcode;
@@ -814,7 +830,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
} opcodes_3d_1d[] = {
{ 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
{ 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" },
- { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
+ { 0x9c, 0, 7, 7, "3DSTATE_CLEAR_PARAMETERS" },
{ 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
{ 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
{ 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
@@ -822,7 +838,6 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
{ 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
{ 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
{ 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
- { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
{ 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
{ 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" },
{ 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" },
@@ -834,9 +849,11 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
{ 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" },
{ 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" },
{ 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" },
- };
+ }, *opcode_3d_1d;
- switch ((data[0] & 0x00ff0000) >> 16) {
+ opcode = (data[0] & 0x00ff0000) >> 16;
+
+ switch (opcode) {
case 0x07:
/* This instruction is unusual. A 0 length means just 1 DWORD instead of
* 2. The 0 length is specified in one place to be unsupported, but
@@ -891,26 +908,56 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
len = (data[0] & 0x0000000f) + 2;
i = 1;
- for (word = 0; word <= 7; word++) {
+ for (word = 0; word <= 8; word++) {
if (data[0] & (1 << (4 + word))) {
if (i >= count)
BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1");
/* save vertex state for decode */
- if (word == 2) {
- saved_s2_set = 1;
- saved_s2 = data[i];
- }
- if (word == 4) {
- saved_s4_set = 1;
- saved_s4 = data[i];
+ if (IS_9XX(devid)) {
+ if (word == 2) {
+ saved_s2_set = 1;
+ saved_s2 = data[i];
+ }
+ if (word == 4) {
+ saved_s4_set = 1;
+ saved_s4 = data[i];
+ }
}
instr_out(data, hw_offset, i++, "S%d\n", word);
}
}
if (len != i) {
- fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+ fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+ (*failures)++;
+ }
+ return len;
+ case 0x03:
+ instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
+ len = (data[0] & 0x0000000f) + 2;
+ i = 1;
+ for (word = 6; word <= 14; word++) {
+ if (data[0] & (1 << word)) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_2");
+
+ if (word == 6)
+ instr_out(data, hw_offset, i++, "TBCF\n");
+ else if (word >= 7 && word <= 10) {
+ instr_out(data, hw_offset, i++, "TB%dC\n", word - 7);
+ instr_out(data, hw_offset, i++, "TB%dA\n", word - 7);
+ } else if (word >= 11 && word <= 14) {
+ instr_out(data, hw_offset, i++, "TM%dS0\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS1\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS2\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS3\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS4\n", word - 11);
+ }
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
(*failures)++;
}
return len;
@@ -922,11 +969,27 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
i = 2;
for (map = 0; map <= 15; map++) {
if (data[1] & (1 << map)) {
+ int width, height, pitch, dword;
+ const char *tiling;
+
if (i + 3 >= count)
BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
instr_out(data, hw_offset, i++, "map %d MS2\n", map);
- instr_out(data, hw_offset, i++, "map %d MS3\n", map);
- instr_out(data, hw_offset, i++, "map %d MS4\n", map);
+
+ dword = data[i];
+ width = ((dword >> 10) & ((1 << 11) - 1))+1;
+ height = ((dword >> 21) & ((1 << 11) - 1))+1;
+
+ tiling = "none";
+ if (dword & (1 << 2))
+ tiling = "fenced";
+ else if (dword & (1 << 1))
+ tiling = dword & (1 << 0) ? "Y" : "X";
+ instr_out(data, hw_offset, i++, "map %d MS3 [width=%d, height=%d, tiling=%s]\n", map, width, height, tiling);
+
+ dword = data[i];
+ pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1);
+ instr_out(data, hw_offset, i++, "map %d MS4 [pitch=%d]\n", map, pitch);
}
}
if (len != i) {
@@ -982,8 +1045,8 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
}
return len;
case 0x01:
- if (i830)
- break;
+ if (!IS_9XX(devid))
+ break;
instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n");
instr_out(data, hw_offset, 1, "mask\n");
len = (data[0] & 0x0000003f) + 2;
@@ -1034,30 +1097,61 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
format,
(data[1] & (1 << 31)) ? "en" : "dis");
return len;
+
+ case 0x8e:
+ {
+ const char *name, *tiling;
+
+ len = (data[0] & 0x0000000f) + 2;
+ if (len != 3)
+ fprintf(out, "Bad count in 3DSTATE_BUFFER_INFO\n");
+ if (count < 3)
+ BUFFER_FAIL(count, len, "3DSTATE_BUFFER_INFO");
+
+ switch((data[1] >> 24) & 0x7) {
+ case 0x3: name = "color"; break;
+ case 0x7: name = "depth"; break;
+ default: name = "unknown"; break;
+ }
+
+ tiling = "none";
+ if (data[1] & (1 << 23))
+ tiling = "fenced";
+ else if (data[1] & (1 << 22))
+ tiling = data[1] & (1 << 21) ? "Y" : "X";
+
+ instr_out(data, hw_offset, 0, "3DSTATE_BUFFER_INFO\n");
+ instr_out(data, hw_offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff);
+
+ instr_out(data, hw_offset, 2, "address\n");
+ return len;
+ }
}
- for (opcode = 0; opcode < Elements(opcodes_3d_1d); opcode++) {
- if (opcodes_3d_1d[opcode].i830_only && !i830)
+ for (idx = 0; idx < Elements(opcodes_3d_1d); idx++)
+ {
+ opcode_3d_1d = &opcodes_3d_1d[idx];
+ if (opcode_3d_1d->i830_only && IS_9XX(devid))
continue;
- if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) {
+ if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) {
len = 1;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name);
- if (opcodes_3d_1d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d_1d->name);
+ if (opcode_3d_1d->max_len > 1) {
len = (data[0] & 0x0000ffff) + 2;
- if (len < opcodes_3d_1d[opcode].min_len ||
- len > opcodes_3d_1d[opcode].max_len)
+ if (len < opcode_3d_1d->min_len ||
+ len > opcode_3d_1d->max_len)
{
fprintf(out, "Bad count in %s\n",
- opcodes_3d_1d[opcode].name);
+ opcode_3d_1d->name);
(*failures)++;
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d_1d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
@@ -1065,7 +1159,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1075,8 +1169,10 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
int *failures)
{
char immediate = (data[0] & (1 << 23)) == 0;
- unsigned int len, i;
+ unsigned int len, i, ret;
char *primtype;
+ int original_s2 = saved_s2;
+ int original_s4 = saved_s4;
switch ((data[0] >> 18) & 0xf) {
case 0x0: primtype = "TRILIST"; break;
@@ -1089,7 +1185,7 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
case 0x7: primtype = "RECTLIST"; break;
case 0x8: primtype = "POINTLIST"; break;
case 0x9: primtype = "DIB"; break;
- case 0xa: primtype = "CLEAR_RECT"; break;
+ case 0xa: primtype = "CLEAR_RECT"; saved_s4 = 3 << 6; saved_s2 = ~0; break;
default: primtype = "unknown"; break;
}
@@ -1193,6 +1289,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
vertex++;
}
}
+
+ ret = len;
} else {
/* indirect vertices */
len = data[0] & 0x0000ffff; /* index count */
@@ -1210,13 +1308,15 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
if ((data[i] & 0xffff) == 0xffff) {
instr_out(data, hw_offset, i,
" indices: (terminator)\n");
- return i;
+ ret = i;
+ goto out;
} else if ((data[i] >> 16) == 0xffff) {
instr_out(data, hw_offset, i,
" indices: 0x%04x, "
"(terminator)\n",
data[i] & 0xffff);
- return i;
+ ret = i;
+ goto out;
} else {
instr_out(data, hw_offset, i,
" indices: 0x%04x, 0x%04x\n",
@@ -1226,7 +1326,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
fprintf(out,
"3DPRIMITIVE: no terminator found in index buffer\n");
(*failures)++;
- return count;
+ ret = count;
+ goto out;
} else {
/* fixed size vertex index buffer */
for (i = 0; i < len; i += 2) {
@@ -1241,7 +1342,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
}
}
}
- return (len + 1) / 2 + 1;
+ ret = (len + 1) / 2 + 1;
+ goto out;
} else {
/* sequential vertex access */
if (count < 2)
@@ -1250,17 +1352,22 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
"3DPRIMITIVE sequential indirect %s, %d starting from "
"%d\n", primtype, len, data[1] & 0xffff);
instr_out(data, hw_offset, 1, " start\n");
- return 2;
+ ret = 2;
+ goto out;
}
}
- return len;
+out:
+ saved_s2 = original_s2;
+ saved_s4 = original_s4;
+ return ret;
}
static int
-decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode;
+ uint32_t opcode;
+ unsigned int idx;
struct {
uint32_t opcode;
@@ -1277,41 +1384,44 @@ decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x0d, 1, 1, "3DSTATE_MODES_4" },
{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
- };
+ }, *opcode_3d;
+
+ opcode = (data[0] & 0x1f000000) >> 24;
- switch ((data[0] & 0x1f000000) >> 24) {
+ switch (opcode) {
case 0x1f:
return decode_3d_primitive(data, count, hw_offset, failures);
case 0x1d:
- return decode_3d_1d(data, count, hw_offset, failures, 0);
+ return decode_3d_1d(data, count, hw_offset, devid, failures);
case 0x1c:
return decode_3d_1c(data, count, hw_offset, failures);
}
- for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
- if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < Elements(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if (opcode == opcode_3d->opcode) {
unsigned int len = 1, i;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1401,12 +1511,87 @@ get_965_prim_type(uint32_t data)
default: return "fail";
}
}
+static int
+i965_decode_urb_fence(const uint32_t *data, uint32_t hw_offset, int len, int count,
+ int *failures)
+{
+ uint32_t vs_fence, clip_fence, gs_fence, sf_fence, vfe_fence, cs_fence;
+
+ if (len != 3)
+ fprintf(out, "Bad count in URB_FENCE\n");
+ if (count < 3)
+ BUFFER_FAIL(count, len, "URB_FENCE");
+
+ vs_fence = data[1] & 0x3ff;
+ gs_fence = (data[1] >> 10) & 0x3ff;
+ clip_fence = (data[1] >> 20) & 0x3ff;
+ sf_fence = data[2] & 0x3ff;
+ vfe_fence = (data[2] >> 10) & 0x3ff;
+ cs_fence = (data[2] >> 20) & 0x7ff;
+
+ instr_out(data, hw_offset, 0, "URB_FENCE: %s%s%s%s%s%s\n",
+ (data[0] >> 13) & 1 ? "cs " : "",
+ (data[0] >> 12) & 1 ? "vfe " : "",
+ (data[0] >> 11) & 1 ? "sf " : "",
+ (data[0] >> 10) & 1 ? "clip " : "",
+ (data[0] >> 9) & 1 ? "gs " : "",
+ (data[0] >> 8) & 1 ? "vs " : "");
+ instr_out(data, hw_offset, 1,
+ "vs fence: %d, clip_fence: %d, gs_fence: %d\n",
+ vs_fence, clip_fence, gs_fence);
+ instr_out(data, hw_offset, 2,
+ "sf fence: %d, vfe_fence: %d, cs_fence: %d\n",
+ sf_fence, vfe_fence, cs_fence);
+ if (gs_fence < vs_fence)
+ fprintf(out, "gs fence < vs fence!\n");
+ if (clip_fence < gs_fence)
+ fprintf(out, "clip fence < gs fence!\n");
+ if (sf_fence < clip_fence)
+ fprintf(out, "sf fence < clip fence!\n");
+ if (cs_fence < sf_fence)
+ fprintf(out, "cs fence < sf fence!\n");
+
+ return len;
+}
+
+static void
+state_base_out(const uint32_t *data, uint32_t hw_offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1) {
+ instr_out(data, hw_offset, index, "%s state base address 0x%08x\n",
+ name, data[index] & ~1);
+ } else {
+ instr_out(data, hw_offset, index, "%s state base not updated\n",
+ name);
+ }
+}
+
+static void
+state_max_out(const uint32_t *data, uint32_t hw_offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1) {
+ if (data[index] == 1) {
+ instr_out(data, hw_offset, index,
+ "%s state upper bound disabled\n", name);
+ } else {
+ instr_out(data, hw_offset, index, "%s state upper bound 0x%08x\n",
+ name, data[index] & ~1);
+ }
+ } else {
+ instr_out(data, hw_offset, index, "%s state upper bound not updated\n",
+ name);
+ }
+}
static int
-decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode, len;
- int i;
+ uint32_t opcode;
+ unsigned int idx, len;
+ int i, sba_len;
+ char *desc1 = NULL;
struct {
uint32_t opcode;
@@ -1435,51 +1620,78 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+ { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+ { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
+ { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
{ 0x7b00, 6, 6, "3DPRIMITIVE" },
- };
+ { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" },
+ { 0x7805, 3, 3, "3DSTATE_URB" },
+ { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" },
+ { 0x7810, 6, 6, "3DSTATE_VS_STATE" },
+ { 0x7811, 7, 7, "3DSTATE_GS_STATE" },
+ { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
+ { 0x7813, 20, 20, "3DSTATE_SF_STATE" },
+ { 0x7814, 9, 9, "3DSTATE_WM_STATE" },
+ { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
+ { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
+ { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
+ { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
+ { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
+ }, *opcode_3d;
len = (data[0] & 0x0000ffff) + 2;
- switch ((data[0] & 0xffff0000) >> 16) {
+ opcode = (data[0] & 0xffff0000) >> 16;
+ switch (opcode) {
+ case 0x6000:
+ len = (data[0] & 0x000000ff) + 2;
+ return i965_decode_urb_fence(data, hw_offset, len, count, failures);
+ case 0x6001:
+ instr_out(data, hw_offset, 0, "CS_URB_STATE\n");
+ instr_out(data, hw_offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n",
+ (data[1] >> 4) & 0x1f,
+ (((data[1] >> 4) & 0x1f) + 1) * 64,
+ data[1] & 0x7);
+ return len;
+ case 0x6002:
+ len = (data[0] & 0x000000ff) + 2;
+ instr_out(data, hw_offset, 0, "CONSTANT_BUFFER: %s\n",
+ (data[0] >> 8) & 1 ? "valid" : "invalid");
+ instr_out(data, hw_offset, 1, "offset: 0x%08x, length: %d bytes\n",
+ data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64);
+ return len;
case 0x6101:
- if (len != 6)
+ if (IS_GEN6(devid))
+ sba_len = 10;
+ else if (IS_IRONLAKE(devid))
+ sba_len = 8;
+ else
+ sba_len = 6;
+ if (len != sba_len)
fprintf(out, "Bad count in STATE_BASE_ADDRESS\n");
- if (count < 6)
+ if (len != sba_len)
BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS");
+ i = 0;
instr_out(data, hw_offset, 0,
"STATE_BASE_ADDRESS\n");
-
- if (data[1] & 1) {
- instr_out(data, hw_offset, 1, "General state at 0x%08x\n",
- data[1] & ~1);
- } else
- instr_out(data, hw_offset, 1, "General state not updated\n");
-
- if (data[2] & 1) {
- instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n",
- data[2] & ~1);
- } else
- instr_out(data, hw_offset, 2, "Surface state not updated\n");
-
- if (data[3] & 1) {
- instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n",
- data[3] & ~1);
- } else
- instr_out(data, hw_offset, 3, "Indirect state not updated\n");
-
- if (data[4] & 1) {
- instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n",
- data[4] & ~1);
- } else
- instr_out(data, hw_offset, 4, "General state not updated\n");
-
- if (data[5] & 1) {
- instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n",
- data[5] & ~1);
- } else
- instr_out(data, hw_offset, 5, "Indirect state not updated\n");
+ i++;
+
+ state_base_out(data, hw_offset, i++, "general");
+ state_base_out(data, hw_offset, i++, "surface");
+ if (IS_GEN6(devid))
+ state_base_out(data, hw_offset, i++, "dynamic");
+ state_base_out(data, hw_offset, i++, "indirect");
+ if (IS_IRONLAKE(devid) || IS_GEN6(devid))
+ state_base_out(data, hw_offset, i++, "instruction");
+
+ state_max_out(data, hw_offset, i++, "general");
+ if (IS_GEN6(devid))
+ state_max_out(data, hw_offset, i++, "dynamic");
+ state_max_out(data, hw_offset, i++, "indirect");
+ if (IS_IRONLAKE(devid) || IS_GEN6(devid))
+ state_max_out(data, hw_offset, i++, "instruction");
return len;
case 0x7800:
@@ -1498,18 +1710,33 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
instr_out(data, hw_offset, 6, "CC state\n");
return len;
case 0x7801:
- if (len != 6)
+ len = (data[0] & 0x000000ff) + 2;
+ if (len != 6 && len != 4)
fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n");
- if (count < 6)
- BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+ if (len == 6) {
+ if (count < 6)
+ BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS\n");
+ instr_out(data, hw_offset, 1, "VS binding table\n");
+ instr_out(data, hw_offset, 2, "GS binding table\n");
+ instr_out(data, hw_offset, 3, "Clip binding table\n");
+ instr_out(data, hw_offset, 4, "SF binding table\n");
+ instr_out(data, hw_offset, 5, "WM binding table\n");
+ } else {
+ if (count < 4)
+ BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
- instr_out(data, hw_offset, 0,
- "3DSTATE_BINDING_TABLE_POINTERS\n");
- instr_out(data, hw_offset, 1, "VS binding table\n");
- instr_out(data, hw_offset, 2, "GS binding table\n");
- instr_out(data, hw_offset, 3, "Clip binding table\n");
- instr_out(data, hw_offset, 4, "SF binding table\n");
- instr_out(data, hw_offset, 5, "WM binding table\n");
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, "
+ "GS mod %d, PS mod %d\n",
+ (data[0] & (1 << 8)) != 0,
+ (data[0] & (1 << 9)) != 0,
+ (data[0] & (1 << 10)) != 0);
+ instr_out(data, hw_offset, 1, "VS binding table\n");
+ instr_out(data, hw_offset, 2, "GS binding table\n");
+ instr_out(data, hw_offset, 3, "WM binding table\n");
+ }
return len;
@@ -1560,6 +1787,18 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
}
return len;
+ case 0x780d:
+ len = (data[0] & 0xff) + 2;
+ if (len != 4)
+ fprintf(out, "Bad count in 3DSTATE_VIEWPORT_STATE_POINTERS\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_VIEWPORT_STATE_POINTERS");
+ instr_out(data, hw_offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n");
+ instr_out(data, hw_offset, 1, "clip\n");
+ instr_out(data, hw_offset, 2, "sf\n");
+ instr_out(data, hw_offset, 3, "cc\n");
+ return len;
+
case 0x780a:
len = (data[0] & 0xff) + 2;
if (len != 3)
@@ -1592,7 +1831,7 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
return len;
case 0x7905:
- if (len != 5 && len != 6)
+ if (len < 5 || len > 7)
fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
if (count < len)
BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER");
@@ -1609,9 +1848,36 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
((data[3] & 0x0007ffc0) >> 6) + 1,
((data[3] & 0xfff80000) >> 19) + 1);
instr_out(data, hw_offset, 4, "volume depth\n");
- if (len == 6)
+ if (len >= 6)
instr_out(data, hw_offset, 5, "\n");
+ if (len >= 7)
+ instr_out(data, hw_offset, 6, "render target view extent\n");
+
+ return len;
+ case 0x7a00:
+ len = (data[0] & 0xff) + 2;
+ if (len != 4)
+ fprintf(out, "Bad count in PIPE_CONTROL\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "PIPE_CONTROL");
+
+ switch ((data[0] >> 14) & 0x3) {
+ case 0: desc1 = "no write"; break;
+ case 1: desc1 = "qword write"; break;
+ case 2: desc1 = "PS_DEPTH_COUNT write"; break;
+ case 3: desc1 = "TIMESTAMP write"; break;
+ }
+ instr_out(data, hw_offset, 0,
+ "PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, "
+ "%sinst flush\n",
+ desc1,
+ data[0] & (1 << 13) ? "" : "no ",
+ data[0] & (1 << 12) ? "" : "no ",
+ data[0] & (1 << 11) ? "" : "no ");
+ instr_out(data, hw_offset, 1, "destination address\n");
+ instr_out(data, hw_offset, 2, "immediate dword low\n");
+ instr_out(data, hw_offset, 3, "immediate dword high\n");
return len;
case 0x7b00:
@@ -1633,39 +1899,41 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
return len;
}
- for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
- if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < Elements(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if ((data[0] & 0xffff0000) >> 16 == opcode_3d->opcode) {
unsigned int i;
len = 1;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_965 opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
static int
-decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode;
+ unsigned int idx;
+ uint32_t opcode;
struct {
uint32_t opcode;
@@ -1689,41 +1957,44 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure
{ 0x0f, 1, 1, "3DSTATE_MODES_2" },
{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
{ 0x16, 1, 1, "3DSTATE_MODES_4" },
- };
+ }, *opcode_3d;
+
+ opcode = (data[0] & 0x1f000000) >> 24;
- switch ((data[0] & 0x1f000000) >> 24) {
+ switch (opcode) {
case 0x1f:
return decode_3d_primitive(data, count, hw_offset, failures);
case 0x1d:
- return decode_3d_1d(data, count, hw_offset, failures, 1);
+ return decode_3d_1d(data, count, hw_offset, devid, failures);
case 0x1c:
return decode_3d_1c(data, count, hw_offset, failures);
}
- for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
- if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < Elements(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if ((data[0] & 0x1f000000) >> 24 == opcode_3d->opcode) {
unsigned int len = 1, i;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_i830 opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1736,8 +2007,12 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure
* \param hw_offset hardware address for the buffer
*/
int
-intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
+intel_decode(const uint32_t *data, int count,
+ uint32_t hw_offset,
+ uint32_t devid,
+ uint32_t ignore_end_of_batchbuffer)
{
+ int ret;
int index = 0;
int failures = 0;
@@ -1746,8 +2021,23 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid
while (index < count) {
switch ((data[index] & 0xe0000000) >> 29) {
case 0x0:
- index += decode_mi(data + index, count - index,
+ ret = decode_mi(data + index, count - index,
hw_offset + index * 4, &failures);
+
+ /* If MI_BATCHBUFFER_END happened, then dump the rest of the
+ * output in case we some day want it in debugging, but don't
+ * decode it since it'll just confuse in the common case.
+ */
+ if (ret == -1) {
+ if (ignore_end_of_batchbuffer) {
+ index++;
+ } else {
+ for (index = index + 1; index < count; index++) {
+ instr_out(data, hw_offset, index, "\n");
+ }
+ }
+ } else
+ index += ret;
break;
case 0x2:
index += decode_2d(data + index, count - index,
@@ -1756,13 +2046,16 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid
case 0x3:
if (IS_965(devid)) {
index += decode_3d_965(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
} else if (IS_9XX(devid)) {
index += decode_3d(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
} else {
index += decode_3d_i830(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
}
break;
default:
diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h
index 7683097b86..7e7c108c0c 100644
--- a/src/gallium/drivers/i965/intel_decode.h
+++ b/src/gallium/drivers/i965/intel_decode.h
@@ -25,5 +25,7 @@
*
*/
-int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
+#include "pipe/p_compiler.h"
+
+int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, uint32_t ignore_end_of_batchbuffer);
void intel_decode_context_reset(void);
diff --git a/src/gallium/drivers/i965/intel_structs.h b/src/gallium/drivers/i965/intel_structs.h
index 522e3bd92c..ec6eec8910 100644
--- a/src/gallium/drivers/i965/intel_structs.h
+++ b/src/gallium/drivers/i965/intel_structs.h
@@ -1,6 +1,8 @@
#ifndef INTEL_STRUCTS_H
#define INTEL_STRUCTS_H
+#include "brw_types.h"
+
struct br0 {
GLuint length:8;
GLuint pad0:3;
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index 3efbd6a246..2a9d736015 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -668,34 +668,15 @@ identity_clear_depth_stencil(struct pipe_context *_pipe,
static void
identity_flush(struct pipe_context *_pipe,
- unsigned flags,
struct pipe_fence_handle **fence)
{
struct identity_context *id_pipe = identity_context(_pipe);
struct pipe_context *pipe = id_pipe->pipe;
pipe->flush(pipe,
- flags,
fence);
}
-static unsigned int
-identity_is_resource_referenced(struct pipe_context *_pipe,
- struct pipe_resource *_resource,
- unsigned level,
- int layer)
-{
- struct identity_context *id_pipe = identity_context(_pipe);
- struct identity_resource *id_resource = identity_resource(_resource);
- struct pipe_context *pipe = id_pipe->pipe;
- struct pipe_resource *resource = id_resource->resource;
-
- return pipe->is_resource_referenced(pipe,
- resource,
- level,
- layer);
-}
-
static struct pipe_sampler_view *
identity_context_create_sampler_view(struct pipe_context *_pipe,
struct pipe_resource *_resource,
@@ -855,6 +836,19 @@ identity_context_transfer_inline_write(struct pipe_context *_context,
}
+static void identity_redefine_user_buffer(struct pipe_context *_context,
+ struct pipe_resource *_resource,
+ unsigned offset, unsigned size)
+{
+ struct identity_context *id_context = identity_context(_context);
+ struct identity_resource *id_resource = identity_resource(_resource);
+ struct pipe_context *context = id_context->pipe;
+ struct pipe_resource *resource = id_resource->resource;
+
+ context->redefine_user_buffer(context, resource, offset, size);
+}
+
+
struct pipe_context *
identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
{
@@ -918,7 +912,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
id_pipe->base.clear_render_target = identity_clear_render_target;
id_pipe->base.clear_depth_stencil = identity_clear_depth_stencil;
id_pipe->base.flush = identity_flush;
- id_pipe->base.is_resource_referenced = identity_is_resource_referenced;
id_pipe->base.create_surface = identity_context_create_surface;
id_pipe->base.surface_destroy = identity_context_surface_destroy;
id_pipe->base.create_sampler_view = identity_context_create_sampler_view;
@@ -929,6 +922,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
id_pipe->base.transfer_unmap = identity_context_transfer_unmap;
id_pipe->base.transfer_flush_region = identity_context_transfer_flush_region;
id_pipe->base.transfer_inline_write = identity_context_transfer_inline_write;
+ id_pipe->base.redefine_user_buffer = identity_redefine_user_buffer;
id_pipe->pipe = pipe;
diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c
index 644481bb74..9bf7fd4c0e 100644
--- a/src/gallium/drivers/identity/id_screen.c
+++ b/src/gallium/drivers/identity/id_screen.c
@@ -103,8 +103,7 @@ identity_screen_is_format_supported(struct pipe_screen *_screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage,
- unsigned geom_flags)
+ unsigned tex_usage)
{
struct identity_screen *id_screen = identity_screen(_screen);
struct pipe_screen *screen = id_screen->screen;
@@ -113,8 +112,7 @@ identity_screen_is_format_supported(struct pipe_screen *_screen,
format,
target,
sample_count,
- tex_usage,
- geom_flags);
+ tex_usage);
}
static struct pipe_context *
@@ -242,30 +240,28 @@ identity_screen_fence_reference(struct pipe_screen *_screen,
fence);
}
-static int
+static boolean
identity_screen_fence_signalled(struct pipe_screen *_screen,
- struct pipe_fence_handle *fence,
- unsigned flags)
+ struct pipe_fence_handle *fence)
{
struct identity_screen *id_screen = identity_screen(_screen);
struct pipe_screen *screen = id_screen->screen;
return screen->fence_signalled(screen,
- fence,
- flags);
+ fence);
}
-static int
+static boolean
identity_screen_fence_finish(struct pipe_screen *_screen,
struct pipe_fence_handle *fence,
- unsigned flags)
+ uint64_t timeout)
{
struct identity_screen *id_screen = identity_screen(_screen);
struct pipe_screen *screen = id_screen->screen;
return screen->fence_finish(screen,
fence,
- flags);
+ timeout);
}
struct pipe_screen *
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index e9374cc6ef..1fc7746a83 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -12,7 +12,7 @@ Requirements
See /proc/cpuinfo to know what your CPU supports.
- - LLVM 2.6 (or later)
+ - LLVM. Version 2.8 recommended. 2.6 or later required.
For Linux, on a recent Debian based distribution do:
@@ -30,21 +30,8 @@ Requirements
debug=no. This is necessary as LLVM builds as static library so the chosen
MS CRT must match.
- The version of LLVM from SVN ("2.7svn") from mid-March 2010 is pretty
- stable and has some features not in version 2.6.
-
- scons (optional)
- - udis86, http://udis86.sourceforge.net/ (optional). My personal repository
- supports more opcodes which haven't been merged upstream yet:
-
- git clone git://anongit.freedesktop.org/~jrfonseca/udis86
- cd udis86
- ./autogen.sh
- ./configure --with-pic
- make
- sudo make install
-
Building
========
@@ -94,13 +81,7 @@ that no tail call optimizations are done by gcc.
To better profile JIT code you'll need to build LLVM with oprofile integration.
- source_dir=$PWD/llvm-2.6
- build_dir=$source_dir/build/profile
- install_dir=$source_dir-profile
-
- mkdir -p "$build_dir"
- cd "$build_dir" && \
- $source_dir/configure \
+ ./configure \
--prefix=$install_dir \
--enable-optimized \
--disable-profiling \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 26b258b956..c10a8cbc12 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -96,10 +96,15 @@ if env['platform'] != 'embedded':
tests.append('round')
for test in tests:
+ testname = 'lp_test_' + test
target = env.Program(
- target = 'lp_test_' + test,
- source = ['lp_test_' + test + '.c', 'lp_test_main.c'],
+ target = testname,
+ source = [testname + '.c', 'lp_test_main.c'],
)
env.InstallProgram(target)
+
+ # http://www.scons.org/wiki/UnitTests
+ alias = env.Alias(testname, [target], target[0].abspath)
+ AlwaysBuild(alias)
Export('llvmpipe')
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
index 5c9392504f..06206a24d8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -34,10 +34,12 @@
#ifndef LP_BLD_ALPHA_H
#define LP_BLD_ALPHA_H
+#include "pipe/p_compiler.h"
#include "gallivm/lp_bld.h"
struct pipe_alpha_state;
+struct gallivm_state;
struct lp_type;
struct lp_build_mask_context;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index 038b136a28..e01fc46ec1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -36,10 +36,14 @@
#define LP_BLD_DEPTH_H
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
#include "gallivm/lp_bld.h"
struct pipe_depth_state;
+struct gallivm_state;
struct util_format_description;
struct lp_type;
struct lp_build_mask_context;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 2de20d6e9a..8a5655d499 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -125,6 +125,10 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
}
}
+ for (i = 0; i < llvmpipe->num_vertex_buffers; i++) {
+ pipe_resource_reference(&llvmpipe->vertex_buffer[i].buffer, NULL);
+ }
+
gallivm_destroy(llvmpipe->gallivm);
align_free( llvmpipe );
@@ -132,10 +136,9 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
static void
do_flush( struct pipe_context *pipe,
- unsigned flags,
struct pipe_fence_handle **fence)
{
- llvmpipe_flush(pipe, flags, fence, __FUNCTION__);
+ llvmpipe_flush(pipe, fence, __FUNCTION__);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c
index 3a55e76bc3..a21a3c7448 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.c
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -47,6 +47,9 @@ lp_fence_create(unsigned rank)
static int fence_id;
struct lp_fence *fence = CALLOC_STRUCT(lp_fence);
+ if (!fence)
+ return NULL;
+
pipe_reference_init(&fence->reference, 1);
pipe_mutex_init(fence->mutex);
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 85e3cdec82..42430550ea 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -40,12 +40,10 @@
/**
- * \param flags bitmask of PIPE_FLUSH_x flags
* \param fence if non-null, returns pointer to a fence which can be waited on
*/
void
llvmpipe_flush( struct pipe_context *pipe,
- unsigned flags,
struct pipe_fence_handle **fence,
const char *reason)
{
@@ -54,7 +52,7 @@ llvmpipe_flush( struct pipe_context *pipe,
draw_flush(llvmpipe->draw);
/* ask the setup module to flush */
- lp_setup_flush(llvmpipe->setup, flags, fence, reason);
+ lp_setup_flush(llvmpipe->setup, fence, reason);
if (llvmpipe_variant_count > 1000) {
@@ -65,23 +63,21 @@ llvmpipe_flush( struct pipe_context *pipe,
/* Enable to dump BMPs of the color/depth buffers each frame */
if (0) {
- if (flags & PIPE_FLUSH_FRAME) {
- static unsigned frame_no = 1;
- char filename[256];
- unsigned i;
-
- for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
- util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no);
- debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]);
- }
-
- if (0) {
- util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no);
- debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf);
- }
-
- ++frame_no;
+ static unsigned frame_no = 1;
+ char filename[256];
+ unsigned i;
+
+ for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
+ util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no);
+ debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[i]);
+ }
+
+ if (0) {
+ util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no);
+ debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf);
}
+
+ ++frame_no;
}
}
@@ -90,9 +86,9 @@ llvmpipe_finish( struct pipe_context *pipe,
const char *reason )
{
struct pipe_fence_handle *fence = NULL;
- llvmpipe_flush(pipe, 0, &fence, reason);
+ llvmpipe_flush(pipe, &fence, reason);
if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_finish(pipe->screen, fence, PIPE_TIMEOUT_INFINITE);
pipe->screen->fence_reference(pipe->screen, &fence, NULL);
}
}
@@ -110,7 +106,6 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
struct pipe_resource *resource,
unsigned level,
int layer,
- unsigned flush_flags,
boolean read_only,
boolean cpu_access,
boolean do_not_block,
@@ -118,10 +113,10 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
{
unsigned referenced;
- referenced = pipe->is_resource_referenced(pipe, resource, level, layer);
+ referenced = llvmpipe_is_resource_referenced(pipe, resource, level, layer);
- if ((referenced & PIPE_REFERENCED_FOR_WRITE) ||
- ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) {
+ if ((referenced & LP_REFERENCED_FOR_WRITE) ||
+ ((referenced & LP_REFERENCED_FOR_READ) && !read_only)) {
if (cpu_access) {
/*
@@ -136,7 +131,7 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
* Just flush.
*/
- llvmpipe_flush(pipe, flush_flags, NULL, reason);
+ llvmpipe_flush(pipe, NULL, reason);
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h
index 579d24c68a..efff94c8c0 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.h
+++ b/src/gallium/drivers/llvmpipe/lp_flush.h
@@ -36,7 +36,6 @@ struct pipe_resource;
void
llvmpipe_flush(struct pipe_context *pipe,
- unsigned flags,
struct pipe_fence_handle **fence,
const char *reason);
@@ -49,7 +48,6 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
struct pipe_resource *resource,
unsigned level,
int layer,
- unsigned flush_flags,
boolean read_only,
boolean cpu_access,
boolean do_not_block,
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index a775990f92..482a902dd2 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -36,9 +36,7 @@
#include "util/u_memory.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_debug.h"
-#include "gallivm/lp_bld_intr.h"
#include "lp_context.h"
-#include "lp_screen.h"
#include "lp_jit.h"
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h
index b23a100b87..455adf7d6f 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.h
+++ b/src/gallium/drivers/llvmpipe/lp_perf.h
@@ -33,6 +33,7 @@
#ifndef LP_PERF_H
#define LP_PERF_H
+#include "pipe/p_compiler.h"
/**
* Various counters
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index 84c66dd36e..1e2401fa84 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -69,7 +69,7 @@ llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
*/
if (pq->fence) {
if (!lp_fence_issued(pq->fence))
- llvmpipe_flush(pipe, 0, NULL, __FUNCTION__);
+ llvmpipe_flush(pipe, NULL, __FUNCTION__);
if (!lp_fence_signalled(pq->fence))
lp_fence_wait(pq->fence);
@@ -99,7 +99,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
if (!lp_fence_signalled(pq->fence)) {
if (!lp_fence_issued(pq->fence))
- llvmpipe_flush(pipe, 0, NULL, __FUNCTION__);
+ llvmpipe_flush(pipe, NULL, __FUNCTION__);
if (!wait)
return FALSE;
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index 978d17c575..5d0f5f8b7b 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -74,6 +74,7 @@ lp_scene_create( struct pipe_context *pipe )
void
lp_scene_destroy(struct lp_scene *scene)
{
+ lp_fence_reference(&scene->fence, NULL);
pipe_mutex_destroy(scene->mutex);
assert(scene->data.head->next == NULL);
FREE(scene->data.head);
diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/src/gallium/drivers/llvmpipe/lp_scene_queue.h
index fd7c65a2c8..dd9ab593b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene_queue.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.h
@@ -29,6 +29,8 @@
#ifndef LP_SCENE_QUEUE
#define LP_SCENE_QUEUE
+#include "pipe/p_compiler.h"
+
struct lp_scene_queue;
struct lp_scene;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 9459a3cd11..521a52ad3a 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -151,7 +151,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_ENABLE:
return 1;
case PIPE_CAP_INDEP_BLEND_FUNC:
- return 0;
+ return 1;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 1;
@@ -164,6 +164,9 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1;
case PIPE_CAP_DEPTH_CLAMP:
return 0;
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ return 1;
default:
return 0;
}
@@ -222,8 +225,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned bind,
- unsigned geom_flags )
+ unsigned bind)
{
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct sw_winsys *winsys = screen->winsys;
@@ -276,6 +278,11 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
return util_format_s3tc_enabled;
}
+ /* u_format doesn't support RGTC yet */
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ return FALSE;
+ }
+
/*
* Everything else should be supported by u_format.
*/
@@ -341,10 +348,9 @@ llvmpipe_fence_reference(struct pipe_screen *screen,
/**
* Has the fence been executed/finished?
*/
-static int
+static boolean
llvmpipe_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flag)
+ struct pipe_fence_handle *fence)
{
struct lp_fence *f = (struct lp_fence *) fence;
return lp_fence_signalled(f);
@@ -354,15 +360,15 @@ llvmpipe_fence_signalled(struct pipe_screen *screen,
/**
* Wait for the fence to finish.
*/
-static int
+static boolean
llvmpipe_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence_handle,
- unsigned flag)
+ uint64_t timeout)
{
struct lp_fence *f = (struct lp_fence *) fence_handle;
lp_fence_wait(f);
- return 0;
+ return TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index db04c84efb..3813e0ed97 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -333,12 +333,8 @@ fail:
}
-/**
- * \param flags bitmask of PIPE_FLUSH_x flags
- */
void
lp_setup_flush( struct lp_setup_context *setup,
- unsigned flags,
struct pipe_fence_handle **fence,
const char *reason)
{
@@ -469,7 +465,7 @@ lp_setup_clear( struct lp_setup_context *setup,
unsigned flags )
{
if (!lp_setup_try_clear( setup, color, depth, stencil, flags )) {
- lp_setup_flush(setup, 0, NULL, __FUNCTION__);
+ lp_setup_flush(setup, NULL, __FUNCTION__);
if (!lp_setup_try_clear( setup, color, depth, stencil, flags ))
assert(0);
@@ -753,20 +749,20 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup,
/* check the render targets */
for (i = 0; i < setup->fb.nr_cbufs; i++) {
if (setup->fb.cbufs[i]->texture == texture)
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE;
}
if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) {
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE;
}
/* check textures referenced by the scene */
for (i = 0; i < Elements(setup->scenes); i++) {
if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) {
- return PIPE_REFERENCED_FOR_READ;
+ return LP_REFERENCED_FOR_READ;
}
}
- return PIPE_UNREFERENCED;
+ return LP_UNREFERENCED;
}
@@ -996,6 +992,8 @@ lp_setup_destroy( struct lp_setup_context *setup )
lp_scene_destroy(scene);
}
+ lp_fence_reference(&setup->last_fence, NULL);
+
FREE( setup );
}
@@ -1062,6 +1060,8 @@ lp_setup_begin_query(struct lp_setup_context *setup,
{
/* init the query to its beginning state */
assert(setup->active_query == NULL);
+
+ set_scene_state(setup, SETUP_ACTIVE, "begin_query");
if (setup->scene) {
if (!lp_scene_bin_everywhere(setup->scene,
@@ -1091,6 +1091,8 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
{
union lp_rast_cmd_arg dummy = { 0 };
+ set_scene_state(setup, SETUP_ACTIVE, "end_query");
+
assert(setup->active_query == pq);
setup->active_query = NULL;
@@ -1108,7 +1110,7 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
if (!lp_scene_bin_everywhere(setup->scene,
LP_RAST_OP_END_QUERY,
dummy)) {
- lp_setup_flush(setup, 0, NULL, __FUNCTION__);
+ lp_setup_flush(setup, NULL, __FUNCTION__);
}
}
else {
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index 0d6e161a21..8655259d27 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -64,7 +64,6 @@ lp_setup_clear(struct lp_setup_context *setup,
void
lp_setup_flush( struct lp_setup_context *setup,
- unsigned flags,
struct pipe_fence_handle **fence,
const char *reason);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 2c4943a69f..6243a96f45 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -90,7 +90,6 @@
#include "lp_context.h"
#include "lp_debug.h"
#include "lp_perf.h"
-#include "lp_screen.h"
#include "lp_setup.h"
#include "lp_state.h"
#include "lp_tex_sample.h"
@@ -335,7 +334,8 @@ generate_fs(struct gallivm_state *gallivm,
/* Build the actual shader */
lp_build_tgsi_soa(gallivm, tokens, type, &mask,
- consts_ptr, interp->pos, interp->inputs,
+ consts_ptr, NULL, /* sys values array */
+ interp->pos, interp->inputs,
outputs, sampler, &shader->info.base);
/* Alpha test */
@@ -546,6 +546,7 @@ generate_fragment(struct llvmpipe_context *lp,
unsigned i;
unsigned chan;
unsigned cbuf;
+ boolean cbuf0_write_all;
/* Adjust color input interpolation according to flatshade state:
*/
@@ -559,6 +560,15 @@ generate_fragment(struct llvmpipe_context *lp,
}
}
+ /* check if writes to cbuf[0] are to be copied to all cbufs */
+ cbuf0_write_all = FALSE;
+ for (i = 0;i < shader->info.base.num_properties; i++) {
+ if (shader->info.base.properties[i].name ==
+ TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
+ cbuf0_write_all = TRUE;
+ break;
+ }
+ }
/* TODO: actually pick these based on the fs and color buffer
* characteristics. */
@@ -697,9 +707,10 @@ generate_fragment(struct llvmpipe_context *lp,
mask_input,
counter);
- for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
- for(chan = 0; chan < NUM_CHANNELS; ++chan)
- fs_out_color[cbuf][chan][i] = out_color[cbuf][chan];
+ for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
+ for (chan = 0; chan < NUM_CHANNELS; ++chan)
+ fs_out_color[cbuf][chan][i] =
+ out_color[cbuf * !cbuf0_write_all][chan];
}
sampler->destroy(sampler);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index ad751b9ef4..9f1ec146e9 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -651,7 +651,7 @@ generate_setup_variant(struct gallivm_state *gallivm,
LLVMTypeRef arg_types[7];
LLVMBasicBlockRef block;
LLVMBuilderRef builder = gallivm->builder;
- int64_t t0, t1;
+ int64_t t0 = 0, t1;
if (0)
goto fail;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index fb29423dd3..be86f66de9 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -33,6 +33,8 @@
#include "lp_state.h"
#include "draw/draw_context.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
static void *
@@ -80,8 +82,9 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe,
assert(count <= PIPE_MAX_ATTRIBS);
- memcpy(llvmpipe->vertex_buffer, buffers, count * sizeof(buffers[0]));
- llvmpipe->num_vertex_buffers = count;
+ util_copy_vertex_buffers(llvmpipe->vertex_buffer,
+ &llvmpipe->num_vertex_buffers,
+ buffers, count);
llvmpipe->dirty |= LP_NEW_VERTEX;
@@ -112,4 +115,6 @@ llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe)
llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer;
+
+ llvmpipe->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c
index e7e46a628a..f49638acf0 100644
--- a/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -69,7 +69,6 @@ lp_resource_copy(struct pipe_context *pipe,
llvmpipe_flush_resource(pipe,
dst, dst_level, dstz,
- 0, /* flush_flags */
FALSE, /* read_only */
TRUE, /* cpu_access */
FALSE, /* do_not_block */
@@ -77,7 +76,6 @@ lp_resource_copy(struct pipe_context *pipe,
llvmpipe_flush_resource(pipe,
src, src_level, src_box->z,
- 0, /* flush_flags */
TRUE, /* read_only */
TRUE, /* cpu_access */
FALSE, /* do_not_block */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index 149ee6f125..d229c62031 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -35,24 +35,13 @@
#include "util/u_cpu_detect.h"
+#include "util/u_math.h"
#include "gallivm/lp_bld_const.h"
#include "gallivm/lp_bld_init.h"
#include "lp_test.h"
-#ifdef PIPE_CC_MSVC
-static INLINE double
-round(double x)
-{
- if (x >= 0.0)
- return floor(x + 0.5);
- else
- return ceil(x - 0.5);
-}
-#endif
-
-
void
dump_type(FILE *fp,
struct lp_type type)
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 9753da5e57..fa4ce5bf2a 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -575,7 +575,6 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
if (!llvmpipe_flush_resource(pipe, resource,
level,
box->depth > 1 ? -1 : box->z,
- 0, /* flush_flags */
read_only,
TRUE, /* cpu_access */
do_not_block,
@@ -695,7 +694,7 @@ llvmpipe_transfer_unmap(struct pipe_context *pipe,
transfer->box.z);
}
-static unsigned int
+unsigned int
llvmpipe_is_resource_referenced( struct pipe_context *pipe,
struct pipe_resource *presource,
unsigned level, int layer)
@@ -703,7 +702,7 @@ llvmpipe_is_resource_referenced( struct pipe_context *pipe,
struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
if (presource->target == PIPE_BUFFER)
- return PIPE_UNREFERENCED;
+ return LP_UNREFERENCED;
return lp_setup_is_resource_referenced(llvmpipe->setup, presource);
}
@@ -1401,7 +1400,6 @@ llvmpipe_init_context_resource_funcs(struct pipe_context *pipe)
pipe->transfer_destroy = llvmpipe_transfer_destroy;
pipe->transfer_map = llvmpipe_transfer_map;
pipe->transfer_unmap = llvmpipe_transfer_unmap;
- pipe->is_resource_referenced = llvmpipe_is_resource_referenced;
pipe->transfer_flush_region = u_default_transfer_flush_region;
pipe->transfer_inline_write = u_default_transfer_inline_write;
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index b789c0f409..b4a0dfd1c3 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -243,4 +243,14 @@ llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen);
extern void
llvmpipe_init_context_texture_funcs(struct pipe_context *pipe);
+
+#define LP_UNREFERENCED 0
+#define LP_REFERENCED_FOR_READ (1 << 0)
+#define LP_REFERENCED_FOR_WRITE (1 << 1)
+
+unsigned int
+llvmpipe_is_resource_referenced( struct pipe_context *pipe,
+ struct pipe_resource *presource,
+ unsigned level, int layer);
+
#endif /* LP_TEXTURE_H */
diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c
index c9c463f470..496b7f5ec6 100644
--- a/src/gallium/drivers/noop/noop_pipe.c
+++ b/src/gallium/drivers/noop/noop_pipe.c
@@ -30,10 +30,16 @@
#include <util/u_inlines.h>
#include <util/u_format.h>
#include "noop_public.h"
-#include "state_tracker/sw_winsys.h"
+
+DEBUG_GET_ONCE_BOOL_OPTION(noop, "GALLIUM_NOOP", FALSE)
void noop_init_state_functions(struct pipe_context *ctx);
+struct noop_pipe_screen {
+ struct pipe_screen pscreen;
+ struct pipe_screen *oscreen;
+};
+
/*
* query
*/
@@ -81,13 +87,6 @@ struct noop_resource {
struct sw_displaytarget *dt;
};
-static unsigned noop_is_resource_referenced(struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level, int layer)
-{
- return PIPE_UNREFERENCED;
-}
-
static struct pipe_resource *noop_resource_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
@@ -108,52 +107,29 @@ static struct pipe_resource *noop_resource_create(struct pipe_screen *screen,
FREE(nresource);
return NULL;
}
-#if 0
- if (nresource->base.bind & (PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) {
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- unsigned stride;
-
- nresource->dt = winsys->displaytarget_create(winsys, nresource->base.bind,
- nresource->base.format,
- nresource->base.width0,
- nresource->base.height0,
- 16, &stride);
- }
-#endif
return &nresource->base;
}
-static struct pipe_resource *noop_resource_from_handle(struct pipe_screen * screen,
+static struct pipe_resource *noop_resource_from_handle(struct pipe_screen *screen,
const struct pipe_resource *templ,
- struct winsys_handle *whandle)
+ struct winsys_handle *handle)
{
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- struct noop_resource *nresource;
- struct sw_displaytarget *dt;
- unsigned stride;
+ struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen;
+ struct pipe_screen *oscreen = noop_screen->oscreen;
+ struct pipe_resource *result;
+ struct pipe_resource *noop_resource;
- dt = winsys->displaytarget_from_handle(winsys, templ, whandle, &stride);
- if (dt == NULL) {
- return NULL;
- }
- nresource = (struct noop_resource *)noop_resource_create(screen, templ);
- nresource->dt = dt;
- return &nresource->base;
+ result = oscreen->resource_from_handle(oscreen, templ, handle);
+ noop_resource = noop_resource_create(screen, result);
+ pipe_resource_reference(&result, NULL);
+ return noop_resource;
}
static boolean noop_resource_get_handle(struct pipe_screen *screen,
struct pipe_resource *resource,
struct winsys_handle *handle)
{
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- struct noop_resource *nresource = (struct noop_resource *)resource;
-
- if (nresource->dt == NULL)
- return FALSE;
-
- return winsys->displaytarget_get_handle(winsys, nresource->dt, handle);
+ return FALSE;
}
static void noop_resource_destroy(struct pipe_screen *screen,
@@ -161,11 +137,6 @@ static void noop_resource_destroy(struct pipe_screen *screen,
{
struct noop_resource *nresource = (struct noop_resource *)resource;
- if (nresource->dt) {
- /* display target */
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- winsys->displaytarget_destroy(winsys, nresource->dt);
- }
free(nresource->data);
FREE(resource);
}
@@ -289,7 +260,7 @@ static void noop_resource_copy_region(struct pipe_context *ctx,
/*
* context
*/
-static void noop_flush(struct pipe_context *ctx, unsigned flags,
+static void noop_flush(struct pipe_context *ctx,
struct pipe_fence_handle **fence)
{
}
@@ -325,7 +296,6 @@ static struct pipe_context *noop_create_context(struct pipe_screen *screen, void
ctx->transfer_unmap = noop_transfer_unmap;
ctx->transfer_destroy = noop_transfer_destroy;
ctx->transfer_inline_write = noop_transfer_inline_write;
- ctx->is_resource_referenced = noop_is_resource_referenced;
noop_init_state_functions(ctx);
return ctx;
@@ -475,27 +445,37 @@ static boolean noop_is_format_supported(struct pipe_screen* screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned usage,
- unsigned geom_flags)
+ unsigned usage)
{
return true;
}
static void noop_destroy_screen(struct pipe_screen *screen)
{
+ struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen;
+ struct pipe_screen *oscreen = noop_screen->oscreen;
+
+ oscreen->destroy(oscreen);
FREE(screen);
}
-struct pipe_screen *noop_screen_create(struct sw_winsys *winsys)
+struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen)
{
+ struct noop_pipe_screen *noop_screen;
struct pipe_screen *screen;
- screen = CALLOC_STRUCT(pipe_screen);
- if (screen == NULL) {
+ if (!debug_get_option_noop()) {
+ return oscreen;
+ }
+
+ noop_screen = CALLOC_STRUCT(noop_pipe_screen);
+ if (noop_screen == NULL) {
return NULL;
}
+ noop_screen->oscreen = oscreen;
+ screen = &noop_screen->pscreen;
- screen->winsys = (struct pipe_winsys*)winsys;
+ screen->winsys = oscreen->winsys;
screen->destroy = noop_destroy_screen;
screen->get_name = noop_get_name;
screen->get_vendor = noop_get_vendor;
diff --git a/src/gallium/drivers/noop/noop_public.h b/src/gallium/drivers/noop/noop_public.h
index 8ce82bec69..180ea597fa 100644
--- a/src/gallium/drivers/noop/noop_public.h
+++ b/src/gallium/drivers/noop/noop_public.h
@@ -23,8 +23,7 @@
#ifndef NOOP_PUBLIC_H
#define NOOP_PUBLIC_H
-struct sw_winsys;
-
-struct pipe_screen *noop_screen_create(struct sw_winsys *winsys);
+struct pipe_screen;
+struct pipe_screen *noop_screen_create(struct pipe_screen *screen);
#endif
diff --git a/src/gallium/drivers/noop/noop_state.c b/src/gallium/drivers/noop/noop_state.c
index ad324774c0..00a4c1eb01 100644
--- a/src/gallium/drivers/noop/noop_state.c
+++ b/src/gallium/drivers/noop/noop_state.c
@@ -28,6 +28,7 @@
#include <pipe/p_screen.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
+#include "util/u_transfer.h"
static void noop_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
@@ -287,4 +288,5 @@ void noop_init_state_functions(struct pipe_context *ctx)
ctx->sampler_view_destroy = noop_sampler_view_destroy;
ctx->surface_destroy = noop_surface_destroy;
ctx->draw_vbo = noop_draw_vbo;
+ ctx->redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile
index db591b756c..3210d1ff77 100644
--- a/src/gallium/drivers/nouveau/Makefile
+++ b/src/gallium/drivers/nouveau/Makefile
@@ -4,8 +4,12 @@ include $(TOP)/configs/current
LIBNAME = nouveau
LIBRARY_INCLUDES = \
+ $(LIBDRM_CFLAGS) \
-I$(TOP)/src/gallium/drivers/nouveau/include
-C_SOURCES = nouveau_screen.c
+C_SOURCES = nouveau_screen.c \
+ nouveau_fence.c \
+ nouveau_mm.c \
+ nouveau_buffer.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
new file mode 100644
index 0000000000..01d3aa46d0
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -0,0 +1,487 @@
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "nouveau_screen.h"
+#include "nouveau_context.h"
+#include "nouveau_winsys.h"
+#include "nouveau_fence.h"
+#include "nouveau_buffer.h"
+#include "nouveau_mm.h"
+
+struct nouveau_transfer {
+ struct pipe_transfer base;
+};
+
+static INLINE struct nouveau_transfer *
+nouveau_transfer(struct pipe_transfer *transfer)
+{
+ return (struct nouveau_transfer *)transfer;
+}
+
+static INLINE boolean
+nouveau_buffer_allocate(struct nouveau_screen *screen,
+ struct nv04_resource *buf, unsigned domain)
+{
+ if (domain == NOUVEAU_BO_VRAM) {
+ buf->mm = nouveau_mm_allocate(screen->mm_VRAM, buf->base.width0,
+ &buf->bo, &buf->offset);
+ if (!buf->bo)
+ return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
+ } else
+ if (domain == NOUVEAU_BO_GART) {
+ buf->mm = nouveau_mm_allocate(screen->mm_GART, buf->base.width0,
+ &buf->bo, &buf->offset);
+ if (!buf->bo)
+ return FALSE;
+ }
+ if (domain != NOUVEAU_BO_GART) {
+ if (!buf->data) {
+ buf->data = MALLOC(buf->base.width0);
+ if (!buf->data)
+ return FALSE;
+ }
+ }
+ buf->domain = domain;
+ return TRUE;
+}
+
+static INLINE void
+release_allocation(struct nouveau_mm_allocation **mm,
+ struct nouveau_fence *fence)
+{
+ nouveau_fence_work(fence, nouveau_mm_free_work, *mm);
+ (*mm) = NULL;
+}
+
+INLINE void
+nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
+{
+ nouveau_bo_ref(NULL, &buf->bo);
+
+ if (buf->mm)
+ release_allocation(&buf->mm, buf->fence);
+
+ buf->domain = 0;
+}
+
+static INLINE boolean
+nouveau_buffer_reallocate(struct nouveau_screen *screen,
+ struct nv04_resource *buf, unsigned domain)
+{
+ nouveau_buffer_release_gpu_storage(buf);
+
+ return nouveau_buffer_allocate(screen, buf, domain);
+}
+
+static void
+nouveau_buffer_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *presource)
+{
+ struct nv04_resource *res = nv04_resource(presource);
+
+ nouveau_buffer_release_gpu_storage(res);
+
+ if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
+ FREE(res->data);
+
+ FREE(res);
+}
+
+/* Maybe just migrate to GART right away if we actually need to do this. */
+boolean
+nouveau_buffer_download(struct nouveau_context *nv, struct nv04_resource *buf,
+ unsigned start, unsigned size)
+{
+ struct nouveau_mm_allocation *mm;
+ struct nouveau_bo *bounce = NULL;
+ uint32_t offset;
+
+ assert(buf->domain == NOUVEAU_BO_VRAM);
+
+ mm = nouveau_mm_allocate(nv->screen->mm_GART, size, &bounce, &offset);
+ if (!bounce)
+ return FALSE;
+
+ nv->copy_data(nv, bounce, offset, NOUVEAU_BO_GART,
+ buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, size);
+
+ if (nouveau_bo_map_range(bounce, offset, size, NOUVEAU_BO_RD))
+ return FALSE;
+ memcpy(buf->data + start, bounce->map, size);
+ nouveau_bo_unmap(bounce);
+
+ buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+
+ nouveau_bo_ref(NULL, &bounce);
+ if (mm)
+ nouveau_mm_free(mm);
+ return TRUE;
+}
+
+static boolean
+nouveau_buffer_upload(struct nouveau_context *nv, struct nv04_resource *buf,
+ unsigned start, unsigned size)
+{
+ struct nouveau_mm_allocation *mm;
+ struct nouveau_bo *bounce = NULL;
+ uint32_t offset;
+
+ if (size <= 192) {
+ nv->push_data(nv, buf->bo, buf->offset + start, buf->domain,
+ size, buf->data + start);
+ return TRUE;
+ }
+
+ mm = nouveau_mm_allocate(nv->screen->mm_GART, size, &bounce, &offset);
+ if (!bounce)
+ return FALSE;
+
+ nouveau_bo_map_range(bounce, offset, size,
+ NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC);
+ memcpy(bounce->map, buf->data + start, size);
+ nouveau_bo_unmap(bounce);
+
+ nv->copy_data(nv, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM,
+ bounce, offset, NOUVEAU_BO_GART, size);
+
+ nouveau_bo_ref(NULL, &bounce);
+ if (mm)
+ release_allocation(&mm, nv->screen->fence.current);
+
+ if (start == 0 && size == buf->base.width0)
+ buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ return TRUE;
+}
+
+static struct pipe_transfer *
+nouveau_buffer_transfer_get(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box)
+{
+ struct nv04_resource *buf = nv04_resource(resource);
+ struct nouveau_context *nv = nouveau_context(pipe);
+ struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer);
+ if (!xfr)
+ return NULL;
+
+ xfr->base.resource = resource;
+ xfr->base.box.x = box->x;
+ xfr->base.box.width = box->width;
+ xfr->base.usage = usage;
+
+ if (buf->domain == NOUVEAU_BO_VRAM) {
+ if (usage & PIPE_TRANSFER_READ) {
+ if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING)
+ nouveau_buffer_download(nv, buf, 0, buf->base.width0);
+ }
+ }
+
+ return &xfr->base;
+}
+
+static void
+nouveau_buffer_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ struct nv04_resource *buf = nv04_resource(transfer->resource);
+ struct nouveau_transfer *xfr = nouveau_transfer(transfer);
+ struct nouveau_context *nv = nouveau_context(pipe);
+
+ if (xfr->base.usage & PIPE_TRANSFER_WRITE) {
+ /* writing is worse */
+ nouveau_buffer_adjust_score(nv, buf, -5000);
+
+ if (buf->domain == NOUVEAU_BO_VRAM) {
+ nouveau_buffer_upload(nv, buf, transfer->box.x, transfer->box.width);
+ }
+
+ if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER)))
+ nouveau_context(pipe)->vbo_dirty = TRUE;
+ }
+
+ FREE(xfr);
+}
+
+static INLINE boolean
+nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw)
+{
+ if (rw == PIPE_TRANSFER_READ) {
+ if (!buf->fence_wr)
+ return TRUE;
+ if (!nouveau_fence_wait(buf->fence_wr))
+ return FALSE;
+ } else {
+ if (!buf->fence)
+ return TRUE;
+ if (!nouveau_fence_wait(buf->fence))
+ return FALSE;
+
+ nouveau_fence_ref(NULL, &buf->fence);
+ }
+ nouveau_fence_ref(NULL, &buf->fence_wr);
+
+ return TRUE;
+}
+
+static INLINE boolean
+nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
+{
+ if (rw == PIPE_TRANSFER_READ)
+ return (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr));
+ else
+ return (buf->fence && !nouveau_fence_signalled(buf->fence));
+}
+
+static void *
+nouveau_buffer_transfer_map(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ struct nouveau_transfer *xfr = nouveau_transfer(transfer);
+ struct nv04_resource *buf = nv04_resource(transfer->resource);
+ struct nouveau_bo *bo = buf->bo;
+ uint8_t *map;
+ int ret;
+ uint32_t offset = xfr->base.box.x;
+ uint32_t flags;
+
+ nouveau_buffer_adjust_score(nouveau_context(pipe), buf, -250);
+
+ if (buf->domain != NOUVEAU_BO_GART)
+ return buf->data + offset;
+
+ if (buf->mm)
+ flags = NOUVEAU_BO_NOSYNC | NOUVEAU_BO_RDWR;
+ else
+ flags = nouveau_screen_transfer_flags(xfr->base.usage);
+
+ offset += buf->offset;
+
+ ret = nouveau_bo_map_range(buf->bo, offset, xfr->base.box.width, flags);
+ if (ret)
+ return NULL;
+ map = bo->map;
+
+ /* Unmap right now. Since multiple buffers can share a single nouveau_bo,
+ * not doing so might make future maps fail or trigger "reloc while mapped"
+ * errors. For now, mappings to userspace are guaranteed to be persistent.
+ */
+ nouveau_bo_unmap(bo);
+
+ if (buf->mm) {
+ if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) {
+ if (nouveau_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE))
+ return NULL;
+ } else
+ if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ nouveau_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE);
+ }
+ }
+ return map;
+}
+
+
+
+static void
+nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ struct nv04_resource *res = nv04_resource(transfer->resource);
+ struct nouveau_bo *bo = res->bo;
+ unsigned offset = res->offset + transfer->box.x + box->x;
+
+ /* not using non-snoop system memory yet, no need for cflush */
+ if (1)
+ return;
+
+ /* XXX: maybe need to upload for VRAM buffers here */
+
+ nouveau_screen_bo_map_flush_range(pipe->screen, bo, offset, box->width);
+}
+
+static void
+nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ /* we've called nouveau_bo_unmap right after map */
+}
+
+const struct u_resource_vtbl nouveau_buffer_vtbl =
+{
+ u_default_resource_get_handle, /* get_handle */
+ nouveau_buffer_destroy, /* resource_destroy */
+ nouveau_buffer_transfer_get, /* get_transfer */
+ nouveau_buffer_transfer_destroy, /* transfer_destroy */
+ nouveau_buffer_transfer_map, /* transfer_map */
+ nouveau_buffer_transfer_flush_region, /* transfer_flush_region */
+ nouveau_buffer_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *
+nouveau_buffer_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ)
+{
+ struct nouveau_screen *screen = nouveau_screen(pscreen);
+ struct nv04_resource *buffer;
+ boolean ret;
+
+ buffer = CALLOC_STRUCT(nv04_resource);
+ if (!buffer)
+ return NULL;
+
+ buffer->base = *templ;
+ buffer->vtbl = &nouveau_buffer_vtbl;
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->base.screen = pscreen;
+
+ if ((buffer->base.bind & screen->sysmem_bindings) == screen->sysmem_bindings)
+ ret = nouveau_buffer_allocate(screen, buffer, 0);
+ else
+ ret = nouveau_buffer_allocate(screen, buffer, NOUVEAU_BO_GART);
+
+ if (ret == FALSE)
+ goto fail;
+
+ return &buffer->base;
+
+fail:
+ FREE(buffer);
+ return NULL;
+}
+
+
+struct pipe_resource *
+nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
+ unsigned bytes, unsigned bind)
+{
+ struct nv04_resource *buffer;
+
+ buffer = CALLOC_STRUCT(nv04_resource);
+ if (!buffer)
+ return NULL;
+
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->vtbl = &nouveau_buffer_vtbl;
+ buffer->base.screen = pscreen;
+ buffer->base.format = PIPE_FORMAT_R8_UNORM;
+ buffer->base.usage = PIPE_USAGE_IMMUTABLE;
+ buffer->base.bind = bind;
+ buffer->base.width0 = bytes;
+ buffer->base.height0 = 1;
+ buffer->base.depth0 = 1;
+
+ buffer->data = ptr;
+ buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY;
+
+ return &buffer->base;
+}
+
+/* Like download, but for GART buffers. Merge ? */
+static INLINE boolean
+nouveau_buffer_data_fetch(struct nv04_resource *buf, struct nouveau_bo *bo,
+ unsigned offset, unsigned size)
+{
+ if (!buf->data) {
+ buf->data = MALLOC(size);
+ if (!buf->data)
+ return FALSE;
+ }
+ if (nouveau_bo_map_range(bo, offset, size, NOUVEAU_BO_RD))
+ return FALSE;
+ memcpy(buf->data, bo->map, size);
+ nouveau_bo_unmap(bo);
+
+ return TRUE;
+}
+
+/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
+boolean
+nouveau_buffer_migrate(struct nouveau_context *nv,
+ struct nv04_resource *buf, const unsigned new_domain)
+{
+ struct nouveau_screen *screen = nv->screen;
+ struct nouveau_bo *bo;
+ const unsigned old_domain = buf->domain;
+ unsigned size = buf->base.width0;
+ unsigned offset;
+ int ret;
+
+ assert(new_domain != old_domain);
+
+ if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
+ if (!nouveau_buffer_allocate(screen, buf, new_domain))
+ return FALSE;
+ ret = nouveau_bo_map_range(buf->bo, buf->offset, size, NOUVEAU_BO_WR |
+ NOUVEAU_BO_NOSYNC);
+ if (ret)
+ return ret;
+ memcpy(buf->bo->map, buf->data, size);
+ nouveau_bo_unmap(buf->bo);
+ FREE(buf->data);
+ } else
+ if (old_domain != 0 && new_domain != 0) {
+ struct nouveau_mm_allocation *mm = buf->mm;
+
+ if (new_domain == NOUVEAU_BO_VRAM) {
+ /* keep a system memory copy of our data in case we hit a fallback */
+ if (!nouveau_buffer_data_fetch(buf, buf->bo, buf->offset, size))
+ return FALSE;
+ debug_printf("migrating %u KiB to VRAM\n", size / 1024);
+ }
+
+ offset = buf->offset;
+ bo = buf->bo;
+ buf->bo = NULL;
+ buf->mm = NULL;
+ nouveau_buffer_allocate(screen, buf, new_domain);
+
+ nv->copy_data(nv, buf->bo, buf->offset, new_domain,
+ bo, offset, old_domain, buf->base.width0);
+
+ nouveau_bo_ref(NULL, &bo);
+ if (mm)
+ release_allocation(&mm, screen->fence.current);
+ } else
+ if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
+ if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
+ return FALSE;
+ if (!nouveau_buffer_upload(nv, buf, 0, buf->base.width0))
+ return FALSE;
+ } else
+ return FALSE;
+
+ assert(buf->domain == new_domain);
+ return TRUE;
+}
+
+/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
+ * We'd like to only allocate @size bytes here, but then we'd have to rebase
+ * the vertex indices ...
+ */
+boolean
+nouveau_user_buffer_upload(struct nv04_resource *buf,
+ unsigned base, unsigned size)
+{
+ struct nouveau_screen *screen = nouveau_screen(buf->base.screen);
+ int ret;
+
+ assert(buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY);
+
+ buf->base.width0 = base + size;
+ if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
+ return FALSE;
+
+ ret = nouveau_bo_map_range(buf->bo, buf->offset + base, size,
+ NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC);
+ if (ret)
+ return FALSE;
+ memcpy(buf->bo->map, buf->data + base, size);
+ nouveau_bo_unmap(buf->bo);
+
+ return TRUE;
+}
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h
new file mode 100644
index 0000000000..46e3554bdf
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -0,0 +1,139 @@
+#ifndef __NOUVEAU_BUFFER_H__
+#define __NOUVEAU_BUFFER_H__
+
+#include "util/u_transfer.h"
+#include "util/u_double_list.h"
+
+struct pipe_resource;
+struct nouveau_context;
+struct nouveau_bo;
+
+#define NOUVEAU_BUFFER_SCORE_MIN -25000
+#define NOUVEAU_BUFFER_SCORE_MAX 25000
+#define NOUVEAU_BUFFER_SCORE_VRAM_THRESHOLD 20000
+
+/* DIRTY: buffer was (or will be after the next flush) written to by GPU and
+ * resource->data has not been updated to reflect modified VRAM contents
+ *
+ * USER_MEMORY: resource->data is a pointer to client memory and may change
+ * between GL calls
+ */
+#define NOUVEAU_BUFFER_STATUS_GPU_READING (1 << 0)
+#define NOUVEAU_BUFFER_STATUS_GPU_WRITING (1 << 1)
+#define NOUVEAU_BUFFER_STATUS_USER_MEMORY (1 << 7)
+
+/* Resources, if mapped into the GPU's address space, are guaranteed to
+ * have constant virtual addresses (nv50+).
+ *
+ * The address of a resource will lie within the nouveau_bo referenced,
+ * and this bo should be added to the memory manager's validation list.
+ */
+struct nv04_resource {
+ struct pipe_resource base;
+ const struct u_resource_vtbl *vtbl;
+
+ uint8_t *data;
+ struct nouveau_bo *bo;
+ uint32_t offset;
+
+ uint8_t status;
+ uint8_t domain;
+
+ int16_t score; /* low if mapped very often, if high can move to VRAM */
+
+ struct nouveau_fence *fence;
+ struct nouveau_fence *fence_wr;
+
+ struct nouveau_mm_allocation *mm;
+};
+
+void
+nouveau_buffer_release_gpu_storage(struct nv04_resource *);
+
+boolean
+nouveau_buffer_download(struct nouveau_context *, struct nv04_resource *,
+ unsigned start, unsigned size);
+
+boolean
+nouveau_buffer_migrate(struct nouveau_context *,
+ struct nv04_resource *, unsigned domain);
+
+static INLINE void
+nouveau_buffer_adjust_score(struct nouveau_context *pipe,
+ struct nv04_resource *res, int16_t score)
+{
+ if (score < 0) {
+ if (res->score > NOUVEAU_BUFFER_SCORE_MIN)
+ res->score += score;
+ } else
+ if (score > 0){
+ if (res->score < NOUVEAU_BUFFER_SCORE_MAX)
+ res->score += score;
+ if (res->domain == NOUVEAU_BO_GART &&
+ res->score > NOUVEAU_BUFFER_SCORE_VRAM_THRESHOLD)
+ nouveau_buffer_migrate(pipe, res, NOUVEAU_BO_VRAM);
+ }
+}
+
+/* XXX: wait for fence (atm only using this for vertex push) */
+static INLINE void *
+nouveau_resource_map_offset(struct nouveau_context *pipe,
+ struct nv04_resource *res, uint32_t offset,
+ uint32_t flags)
+{
+ void *map;
+
+ nouveau_buffer_adjust_score(pipe, res, -250);
+
+ if ((res->domain == NOUVEAU_BO_VRAM) &&
+ (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING))
+ nouveau_buffer_download(pipe, res, 0, res->base.width0);
+
+ if ((res->domain != NOUVEAU_BO_GART) ||
+ (res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
+ return res->data + offset;
+
+ if (res->mm)
+ flags |= NOUVEAU_BO_NOSYNC;
+
+ if (nouveau_bo_map_range(res->bo, res->offset + offset,
+ res->base.width0, flags))
+ return NULL;
+
+ map = res->bo->map;
+ nouveau_bo_unmap(res->bo);
+ return map;
+}
+
+static INLINE void
+nouveau_resource_unmap(struct nv04_resource *res)
+{
+ /* no-op */
+}
+
+static INLINE struct nv04_resource *
+nv04_resource(struct pipe_resource *resource)
+{
+ return (struct nv04_resource *)resource;
+}
+
+/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
+static INLINE boolean
+nouveau_resource_mapped_by_gpu(struct pipe_resource *resource)
+{
+ return nv04_resource(resource)->domain != 0;
+}
+
+struct pipe_resource *
+nouveau_buffer_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ);
+
+struct pipe_resource *
+nouveau_user_buffer_create(struct pipe_screen *screen, void *ptr,
+ unsigned bytes, unsigned usage);
+
+boolean
+nouveau_user_buffer_upload(struct nv04_resource *, unsigned base,
+ unsigned size);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
new file mode 100644
index 0000000000..696e0d3f24
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -0,0 +1,26 @@
+#ifndef __NOUVEAU_CONTEXT_H__
+#define __NOUVEAU_CONTEXT_H__
+
+#include "pipe/p_context.h"
+
+struct nouveau_context {
+ struct pipe_context pipe;
+ struct nouveau_screen *screen;
+
+ boolean vbo_dirty;
+
+ void (*copy_data)(struct nouveau_context *,
+ struct nouveau_bo *dst, unsigned, unsigned,
+ struct nouveau_bo *src, unsigned, unsigned, unsigned);
+ void (*push_data)(struct nouveau_context *,
+ struct nouveau_bo *dst, unsigned, unsigned,
+ unsigned, void *);
+};
+
+static INLINE struct nouveau_context *
+nouveau_context(struct pipe_context *pipe)
+{
+ return (struct nouveau_context *)pipe;
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c
new file mode 100644
index 0000000000..d8f59dce9e
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "util/u_double_list.h"
+
+#include "nouveau_screen.h"
+#include "nouveau_fence.h"
+
+#include "nouveau/nouveau_pushbuf.h"
+
+#ifdef PIPE_OS_UNIX
+#include <sched.h>
+#endif
+
+boolean
+nouveau_fence_new(struct nouveau_screen *screen, struct nouveau_fence **fence,
+ boolean emit)
+{
+ *fence = CALLOC_STRUCT(nouveau_fence);
+ if (!*fence)
+ return FALSE;
+
+ (*fence)->screen = screen;
+ (*fence)->ref = 1;
+ LIST_INITHEAD(&(*fence)->work);
+
+ if (emit)
+ nouveau_fence_emit(*fence);
+
+ return TRUE;
+}
+
+static void
+nouveau_fence_trigger_work(struct nouveau_fence *fence)
+{
+ struct nouveau_fence_work *work, *tmp;
+
+ LIST_FOR_EACH_ENTRY_SAFE(work, tmp, &fence->work, list) {
+ work->func(work->data);
+ LIST_DEL(&work->list);
+ FREE(work);
+ }
+}
+
+boolean
+nouveau_fence_work(struct nouveau_fence *fence,
+ void (*func)(void *), void *data)
+{
+ struct nouveau_fence_work *work;
+
+ if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
+ func(data);
+ return TRUE;
+ }
+
+ work = CALLOC_STRUCT(nouveau_fence_work);
+ if (!work)
+ return FALSE;
+ work->func = func;
+ work->data = data;
+ LIST_ADD(&work->list, &fence->work);
+ return TRUE;
+}
+
+void
+nouveau_fence_emit(struct nouveau_fence *fence)
+{
+ struct nouveau_screen *screen = fence->screen;
+
+ fence->sequence = ++screen->fence.sequence;
+
+ assert(fence->state == NOUVEAU_FENCE_STATE_AVAILABLE);
+
+ /* set this now, so that if fence.emit triggers a flush we don't recurse */
+ fence->state = NOUVEAU_FENCE_STATE_EMITTED;
+
+ screen->fence.emit(&screen->base, fence->sequence);
+
+ ++fence->ref;
+
+ if (screen->fence.tail)
+ screen->fence.tail->next = fence;
+ else
+ screen->fence.head = fence;
+
+ screen->fence.tail = fence;
+}
+
+void
+nouveau_fence_del(struct nouveau_fence *fence)
+{
+ struct nouveau_fence *it;
+ struct nouveau_screen *screen = fence->screen;
+
+ if (fence->state == NOUVEAU_FENCE_STATE_EMITTED ||
+ fence->state == NOUVEAU_FENCE_STATE_FLUSHED) {
+ if (fence == screen->fence.head) {
+ screen->fence.head = fence->next;
+ if (!screen->fence.head)
+ screen->fence.tail = NULL;
+ } else {
+ for (it = screen->fence.head; it && it->next != fence; it = it->next);
+ it->next = fence->next;
+ if (screen->fence.tail == fence)
+ screen->fence.tail = it;
+ }
+ }
+
+ if (!LIST_IS_EMPTY(&fence->work)) {
+ debug_printf("WARNING: deleting fence with work still pending !\n");
+ nouveau_fence_trigger_work(fence);
+ }
+
+ FREE(fence);
+}
+
+void
+nouveau_fence_update(struct nouveau_screen *screen, boolean flushed)
+{
+ struct nouveau_fence *fence;
+ struct nouveau_fence *next = NULL;
+ u32 sequence = screen->fence.update(&screen->base);
+
+ if (screen->fence.sequence_ack == sequence)
+ return;
+ screen->fence.sequence_ack = sequence;
+
+ for (fence = screen->fence.head; fence; fence = next) {
+ next = fence->next;
+ sequence = fence->sequence;
+
+ fence->state = NOUVEAU_FENCE_STATE_SIGNALLED;
+
+ nouveau_fence_trigger_work(fence);
+ nouveau_fence_ref(NULL, &fence);
+
+ if (sequence == screen->fence.sequence_ack)
+ break;
+ }
+ screen->fence.head = next;
+ if (!next)
+ screen->fence.tail = NULL;
+
+ if (flushed) {
+ for (fence = next; fence; fence = fence->next)
+ fence->state = NOUVEAU_FENCE_STATE_FLUSHED;
+ }
+}
+
+#define NOUVEAU_FENCE_MAX_SPINS (1 << 31)
+
+boolean
+nouveau_fence_signalled(struct nouveau_fence *fence)
+{
+ struct nouveau_screen *screen = fence->screen;
+
+ if (fence->state >= NOUVEAU_FENCE_STATE_EMITTED)
+ nouveau_fence_update(screen, FALSE);
+
+ return fence->state == NOUVEAU_FENCE_STATE_SIGNALLED;
+}
+
+boolean
+nouveau_fence_wait(struct nouveau_fence *fence)
+{
+ struct nouveau_screen *screen = fence->screen;
+ uint32_t spins = 0;
+
+ if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) {
+ nouveau_fence_emit(fence);
+
+ if (fence == screen->fence.current)
+ nouveau_fence_new(screen, &screen->fence.current, FALSE);
+ }
+ if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
+ FIRE_RING(screen->channel);
+
+ do {
+ nouveau_fence_update(screen, FALSE);
+
+ if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
+ return TRUE;
+ spins++;
+#ifdef PIPE_OS_UNIX
+ if (!(spins % 8)) /* donate a few cycles */
+ sched_yield();
+#endif
+ } while (spins < NOUVEAU_FENCE_MAX_SPINS);
+
+ debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n",
+ fence->sequence,
+ screen->fence.sequence_ack, screen->fence.sequence);
+
+ return FALSE;
+}
+
+void
+nouveau_fence_next(struct nouveau_screen *screen)
+{
+ if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTED)
+ nouveau_fence_emit(screen->fence.current);
+
+ nouveau_fence_new(screen, &screen->fence.current, FALSE);
+}
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h
new file mode 100644
index 0000000000..680c75e99f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -0,0 +1,59 @@
+
+#ifndef __NOUVEAU_FENCE_H__
+#define __NOUVEAU_FENCE_H__
+
+#include "util/u_inlines.h"
+#include "util/u_double_list.h"
+
+#define NOUVEAU_FENCE_STATE_AVAILABLE 0
+#define NOUVEAU_FENCE_STATE_EMITTED 1
+#define NOUVEAU_FENCE_STATE_FLUSHED 2
+#define NOUVEAU_FENCE_STATE_SIGNALLED 3
+
+struct nouveau_fence_work {
+ struct list_head list;
+ void (*func)(void *);
+ void *data;
+};
+
+struct nouveau_fence {
+ struct nouveau_fence *next;
+ struct nouveau_screen *screen;
+ int state;
+ int ref;
+ uint32_t sequence;
+ struct list_head work;
+};
+
+void nouveau_fence_emit(struct nouveau_fence *);
+void nouveau_fence_del(struct nouveau_fence *);
+
+boolean nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **,
+ boolean emit);
+boolean nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
+void nouveau_fence_update(struct nouveau_screen *, boolean flushed);
+void nouveau_fence_next(struct nouveau_screen *);
+boolean nouveau_fence_wait(struct nouveau_fence *);
+boolean nouveau_fence_signalled(struct nouveau_fence *);
+
+static INLINE void
+nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
+{
+ if (fence)
+ ++fence->ref;
+
+ if (*ref) {
+ if (--(*ref)->ref == 0)
+ nouveau_fence_del(*ref);
+ }
+
+ *ref = fence;
+}
+
+static INLINE struct nouveau_fence *
+nouveau_fence(struct pipe_fence_handle *fence)
+{
+ return (struct nouveau_fence *)fence;
+}
+
+#endif // __NOUVEAU_FENCE_H__
diff --git a/src/gallium/drivers/nouveau/nouveau_mm.c b/src/gallium/drivers/nouveau/nouveau_mm.c
new file mode 100644
index 0000000000..7edeb4d21d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_mm.c
@@ -0,0 +1,288 @@
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_double_list.h"
+
+#include "nouveau_screen.h"
+#include "nouveau_mm.h"
+
+#include "nouveau/nouveau_bo.h"
+
+#define MM_MIN_ORDER 7
+#define MM_MAX_ORDER 20
+
+#define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1)
+
+#define MM_MIN_SIZE (1 << MM_MIN_ORDER)
+#define MM_MAX_SIZE (1 << MM_MAX_ORDER)
+
+struct mm_bucket {
+ struct list_head free;
+ struct list_head used;
+ struct list_head full;
+ int num_free;
+};
+
+struct nouveau_mman {
+ struct nouveau_device *dev;
+ struct mm_bucket bucket[MM_NUM_BUCKETS];
+ uint32_t storage_type;
+ uint32_t domain;
+ uint64_t allocated;
+};
+
+struct mm_slab {
+ struct list_head head;
+ struct nouveau_bo *bo;
+ struct nouveau_mman *cache;
+ int order;
+ int count;
+ int free;
+ uint32_t bits[0];
+};
+
+static int
+mm_slab_alloc(struct mm_slab *slab)
+{
+ int i, n, b;
+
+ if (slab->free == 0)
+ return -1;
+
+ for (i = 0; i < (slab->count + 31) / 32; ++i) {
+ b = ffs(slab->bits[i]) - 1;
+ if (b >= 0) {
+ n = i * 32 + b;
+ assert(n < slab->count);
+ slab->free--;
+ slab->bits[i] &= ~(1 << b);
+ return n;
+ }
+ }
+ return -1;
+}
+
+static INLINE void
+mm_slab_free(struct mm_slab *slab, int i)
+{
+ assert(i < slab->count);
+ slab->bits[i / 32] |= 1 << (i % 32);
+ slab->free++;
+ assert(slab->free <= slab->count);
+}
+
+static INLINE int
+mm_get_order(uint32_t size)
+{
+ int s = __builtin_clz(size) ^ 31;
+
+ if (size > (1 << s))
+ s += 1;
+ return s;
+}
+
+static struct mm_bucket *
+mm_bucket_by_order(struct nouveau_mman *cache, int order)
+{
+ if (order > MM_MAX_ORDER)
+ return NULL;
+ return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER];
+}
+
+static struct mm_bucket *
+mm_bucket_by_size(struct nouveau_mman *cache, unsigned size)
+{
+ return mm_bucket_by_order(cache, mm_get_order(size));
+}
+
+/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */
+static INLINE uint32_t
+mm_default_slab_size(unsigned chunk_order)
+{
+ static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] =
+ {
+ 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22
+ };
+
+ assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER);
+
+ return 1 << slab_order[chunk_order - MM_MIN_ORDER];
+}
+
+static int
+mm_slab_new(struct nouveau_mman *cache, int chunk_order)
+{
+ struct mm_slab *slab;
+ int words, ret;
+ const uint32_t size = mm_default_slab_size(chunk_order);
+
+ words = ((size >> chunk_order) + 31) / 32;
+ assert(words);
+
+ slab = MALLOC(sizeof(struct mm_slab) + words * 4);
+ if (!slab)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ memset(&slab->bits[0], ~0, words * 4);
+
+ slab->bo = NULL;
+ ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size,
+ 0, cache->storage_type, &slab->bo);
+ if (ret) {
+ FREE(slab);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ LIST_INITHEAD(&slab->head);
+
+ slab->cache = cache;
+ slab->order = chunk_order;
+ slab->count = slab->free = size >> chunk_order;
+
+ LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free);
+
+ cache->allocated += size;
+
+ debug_printf("MM: new slab, total memory = %lu KiB\n",
+ cache->allocated / 1024);
+
+ return PIPE_OK;
+}
+
+/* @return token to identify slab or NULL if we just allocated a new bo */
+struct nouveau_mm_allocation *
+nouveau_mm_allocate(struct nouveau_mman *cache,
+ uint32_t size, struct nouveau_bo **bo, uint32_t *offset)
+{
+ struct mm_bucket *bucket;
+ struct mm_slab *slab;
+ struct nouveau_mm_allocation *alloc;
+ int ret;
+
+ bucket = mm_bucket_by_size(cache, size);
+ if (!bucket) {
+ ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size,
+ 0, cache->storage_type, bo);
+ if (ret)
+ debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret);
+
+ *offset = 0;
+ return NULL;
+ }
+
+ if (!LIST_IS_EMPTY(&bucket->used)) {
+ slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head);
+ } else {
+ if (LIST_IS_EMPTY(&bucket->free)) {
+ mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER));
+ }
+ slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head);
+
+ LIST_DEL(&slab->head);
+ LIST_ADD(&slab->head, &bucket->used);
+ }
+
+ *offset = mm_slab_alloc(slab) << slab->order;
+
+ alloc = MALLOC_STRUCT(nouveau_mm_allocation);
+ if (!alloc)
+ return NULL;
+
+ nouveau_bo_ref(slab->bo, bo);
+
+ if (slab->free == 0) {
+ LIST_DEL(&slab->head);
+ LIST_ADD(&slab->head, &bucket->full);
+ }
+
+ alloc->next = NULL;
+ alloc->offset = *offset;
+ alloc->priv = (void *)slab;
+
+ return alloc;
+}
+
+void
+nouveau_mm_free(struct nouveau_mm_allocation *alloc)
+{
+ struct mm_slab *slab = (struct mm_slab *)alloc->priv;
+ struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order);
+
+ mm_slab_free(slab, alloc->offset >> slab->order);
+
+ if (slab->free == 1) {
+ LIST_DEL(&slab->head);
+
+ if (slab->count > 1)
+ LIST_ADDTAIL(&slab->head, &bucket->used);
+ else
+ LIST_ADDTAIL(&slab->head, &bucket->free);
+ }
+
+ FREE(alloc);
+}
+
+void
+nouveau_mm_free_work(void *data)
+{
+ nouveau_mm_free(data);
+}
+
+struct nouveau_mman *
+nouveau_mm_create(struct nouveau_device *dev, uint32_t domain,
+ uint32_t storage_type)
+{
+ struct nouveau_mman *cache = MALLOC_STRUCT(nouveau_mman);
+ int i;
+
+ if (!cache)
+ return NULL;
+
+ cache->dev = dev;
+ cache->domain = domain;
+ cache->storage_type = storage_type;
+ cache->allocated = 0;
+
+ for (i = 0; i < MM_NUM_BUCKETS; ++i) {
+ LIST_INITHEAD(&cache->bucket[i].free);
+ LIST_INITHEAD(&cache->bucket[i].used);
+ LIST_INITHEAD(&cache->bucket[i].full);
+ }
+
+ return cache;
+}
+
+static INLINE void
+nouveau_mm_free_slabs(struct list_head *head)
+{
+ struct mm_slab *slab, *next;
+
+ LIST_FOR_EACH_ENTRY_SAFE(slab, next, head, head) {
+ LIST_DEL(&slab->head);
+ nouveau_bo_ref(NULL, &slab->bo);
+ FREE(slab);
+ }
+}
+
+void
+nouveau_mm_destroy(struct nouveau_mman *cache)
+{
+ int i;
+
+ if (!cache)
+ return;
+
+ for (i = 0; i < MM_NUM_BUCKETS; ++i) {
+ if (!LIST_IS_EMPTY(&cache->bucket[i].used) ||
+ !LIST_IS_EMPTY(&cache->bucket[i].full))
+ debug_printf("WARNING: destroying GPU memory cache "
+ "with some buffers still in use\n");
+
+ nouveau_mm_free_slabs(&cache->bucket[i].free);
+ nouveau_mm_free_slabs(&cache->bucket[i].used);
+ nouveau_mm_free_slabs(&cache->bucket[i].full);
+ }
+
+ FREE(cache);
+}
+
diff --git a/src/gallium/drivers/nouveau/nouveau_mm.h b/src/gallium/drivers/nouveau/nouveau_mm.h
new file mode 100644
index 0000000000..5b57c8ba4f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_mm.h
@@ -0,0 +1,32 @@
+#ifndef __NOUVEAU_MM_H__
+#define __NOUVEAU_MM_H__
+
+struct nouveau_mman;
+
+/* Since a resource can be migrated, we need to decouple allocations from
+ * them. This struct is linked with fences for delayed freeing of allocs.
+ */
+struct nouveau_mm_allocation {
+ struct nouveau_mm_allocation *next;
+ void *priv;
+ uint32_t offset;
+};
+
+extern struct nouveau_mman *
+nouveau_mm_create(struct nouveau_device *, uint32_t domain,
+ uint32_t storage_type);
+
+extern void
+nouveau_mm_destroy(struct nouveau_mman *);
+
+extern struct nouveau_mm_allocation *
+nouveau_mm_allocate(struct nouveau_mman *, uint32_t size,
+ struct nouveau_bo **, uint32_t *offset);
+
+extern void
+nouveau_mm_free(struct nouveau_mm_allocation *);
+
+extern void
+nouveau_mm_free_work(void *);
+
+#endif // __NOUVEAU_MM_H__
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index a9426df686..4cd82a51c1 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -14,6 +14,7 @@
#include "nouveau/nouveau_bo.h"
#include "nouveau_winsys.h"
#include "nouveau_screen.h"
+#include "nouveau_fence.h"
/* XXX this should go away */
#include "state_tracker/drm_driver.h"
@@ -150,23 +151,22 @@ nouveau_screen_fence_ref(struct pipe_screen *pscreen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *pfence)
{
- *ptr = pfence;
+ nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr);
}
-static int
+static boolean
nouveau_screen_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *pfence,
- unsigned flags)
+ struct pipe_fence_handle *pfence)
{
- return 0;
+ return nouveau_fence_signalled(nouveau_fence(pfence));
}
-static int
+static boolean
nouveau_screen_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *pfence,
- unsigned flags)
+ uint64_t timeout)
{
- return 0;
+ return nouveau_fence_wait(nouveau_fence(pfence));
}
@@ -209,26 +209,6 @@ nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
}
}
-
-unsigned int
-nouveau_reference_flags(struct nouveau_bo *bo)
-{
- uint32_t bo_flags;
- int flags = 0;
-
- bo_flags = nouveau_bo_pending(bo);
- if (bo_flags & NOUVEAU_BO_RD)
- flags |= PIPE_REFERENCED_FOR_READ;
- if (bo_flags & NOUVEAU_BO_WR)
- flags |= PIPE_REFERENCED_FOR_WRITE;
-
- return flags;
-}
-
-
-
-
-
int
nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
{
@@ -250,6 +230,10 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
util_format_s3tc_init();
+ screen->mm_GART = nouveau_mm_create(dev,
+ NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+ 0x000);
+ screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0x000);
return 0;
}
@@ -257,7 +241,12 @@ void
nouveau_screen_fini(struct nouveau_screen *screen)
{
struct pipe_winsys *ws = screen->base.winsys;
+
+ nouveau_mm_destroy(screen->mm_GART);
+ nouveau_mm_destroy(screen->mm_VRAM);
+
nouveau_channel_free(&screen->channel);
+
if (ws)
ws->destroy(ws);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 8c290273fb..186ada3967 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -2,6 +2,10 @@
#define __NOUVEAU_SCREEN_H__
#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+typedef uint32_t u32;
+
+struct nouveau_bo;
struct nouveau_screen {
struct pipe_screen base;
@@ -12,6 +16,20 @@ struct nouveau_screen {
* these almost always should be set to the same value */
unsigned vertex_buffer_flags;
unsigned index_buffer_flags;
+ unsigned sysmem_bindings;
+
+ struct {
+ struct nouveau_fence *head;
+ struct nouveau_fence *tail;
+ struct nouveau_fence *current;
+ u32 sequence;
+ u32 sequence_ack;
+ void (*emit)(struct pipe_screen *, u32 sequence);
+ u32 (*update)(struct pipe_screen *);
+ } fence;
+
+ struct nouveau_mman *mm_VRAM;
+ struct nouveau_mman *mm_GART;
};
static INLINE struct nouveau_screen *
@@ -56,17 +74,13 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
struct winsys_handle *whandle,
unsigned *out_stride);
-unsigned int
-nouveau_reference_flags(struct nouveau_bo *bo);
-
-
int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *);
void nouveau_screen_fini(struct nouveau_screen *);
-
+#ifndef NOUVEAU_NVC0
static INLINE unsigned
RING_3D(unsigned mthd, unsigned size)
{
@@ -78,5 +92,6 @@ RING_3D_NI(unsigned mthd, unsigned size)
{
return 0x40000000 | (7 << 13) | (size << 18) | mthd;
}
+#endif
#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
deleted file mode 100644
index e920cf9f3b..0000000000
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ /dev/null
@@ -1,316 +0,0 @@
-#ifndef __NOUVEAU_STATEOBJ_H__
-#define __NOUVEAU_STATEOBJ_H__
-
-#include "util/u_debug.h"
-
-#ifdef DEBUG
-#define DEBUG_NOUVEAU_STATEOBJ
-#endif /* DEBUG */
-
-struct nouveau_stateobj_reloc {
- struct nouveau_bo *bo;
-
- struct nouveau_grobj *gr;
- uint32_t push_offset;
- uint32_t mthd;
-
- uint32_t data;
- unsigned flags;
- unsigned vor;
- unsigned tor;
-};
-
-struct nouveau_stateobj_start {
- struct nouveau_grobj *gr;
- uint32_t mthd;
- uint32_t size;
- unsigned offset;
-};
-
-struct nouveau_stateobj {
- struct pipe_reference reference;
-
- struct nouveau_stateobj_start *start;
- struct nouveau_stateobj_reloc *reloc;
-
- /* Common memory pool for data. */
- uint32_t *pool;
- unsigned pool_cur;
-
-#ifdef DEBUG_NOUVEAU_STATEOBJ
- unsigned start_alloc;
- unsigned reloc_alloc;
- unsigned pool_alloc;
-#endif /* DEBUG_NOUVEAU_STATEOBJ */
-
- unsigned total; /* includes begin_ring */
- unsigned cur; /* excludes begin_ring, offset from "cur_start" */
- unsigned cur_start;
- unsigned cur_reloc;
-};
-
-static INLINE void
-so_dump(struct nouveau_stateobj *so)
-{
- unsigned i, nr, total = 0;
-
- for (i = 0; i < so->cur_start; i++) {
- if (so->start[i].gr->subc > -1)
- debug_printf("+0x%04x: 0x%08x\n", total++,
- (so->start[i].size << 18) | (so->start[i].gr->subc << 13)
- | so->start[i].mthd);
- else
- debug_printf("+0x%04x: 0x%08x\n", total++,
- (so->start[i].size << 18) | so->start[i].mthd);
- for (nr = 0; nr < so->start[i].size; nr++, total++)
- debug_printf("+0x%04x: 0x%08x\n", total,
- so->pool[so->start[i].offset + nr]);
- }
-}
-
-static INLINE struct nouveau_stateobj *
-so_new(unsigned start, unsigned push, unsigned reloc)
-{
- struct nouveau_stateobj *so;
-
- so = MALLOC(sizeof(struct nouveau_stateobj));
- pipe_reference_init(&so->reference, 1);
- so->total = so->cur = so->cur_start = so->cur_reloc = 0;
-
-#ifdef DEBUG_NOUVEAU_STATEOBJ
- so->start_alloc = start;
- so->reloc_alloc = reloc;
- so->pool_alloc = push;
-#endif /* DEBUG_NOUVEAU_STATEOBJ */
-
- so->start = MALLOC(start * sizeof(struct nouveau_stateobj_start));
- so->reloc = MALLOC(reloc * sizeof(struct nouveau_stateobj_reloc));
- so->pool = MALLOC(push * sizeof(uint32_t));
- so->pool_cur = 0;
-
- if (!so->start || !so->reloc || !so->pool) {
- debug_printf("malloc failed\n");
- assert(0);
- }
-
- return so;
-}
-
-static INLINE void
-so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso)
-{
- struct nouveau_stateobj *so = *pso;
- int i;
-
- if (pipe_reference(&(*pso)->reference, &ref->reference)) {
- FREE(so->start);
- for (i = 0; i < so->cur_reloc; i++)
- nouveau_bo_ref(NULL, &so->reloc[i].bo);
- FREE(so->reloc);
- FREE(so->pool);
- FREE(so);
- }
- *pso = ref;
-}
-
-static INLINE void
-so_data(struct nouveau_stateobj *so, uint32_t data)
-{
-#ifdef DEBUG_NOUVEAU_STATEOBJ
- if (so->cur >= so->start[so->cur_start - 1].size) {
- debug_printf("exceeding specified size\n");
- assert(0);
- }
-#endif /* DEBUG_NOUVEAU_STATEOBJ */
-
- so->pool[so->start[so->cur_start - 1].offset + so->cur++] = data;
-}
-
-static INLINE void
-so_datap(struct nouveau_stateobj *so, uint32_t *data, unsigned size)
-{
-#ifdef DEBUG_NOUVEAU_STATEOBJ
- if ((so->cur + size) > so->start[so->cur_start - 1].size) {
- debug_printf("exceeding specified size\n");
- assert(0);
- }
-#endif /* DEBUG_NOUVEAU_STATEOBJ */
-
- while (size--)
- so->pool[so->start[so->cur_start - 1].offset + so->cur++] =
- *data++;
-}
-
-static INLINE void
-so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
- unsigned mthd, unsigned size)
-{
- struct nouveau_stateobj_start *start;
-
-#ifdef DEBUG_NOUVEAU_STATEOBJ
- if (so->start_alloc <= so->cur_start) {
- debug_printf("exceeding num_start size\n");
- assert(0);
- }
-#endif /* DEBUG_NOUVEAU_STATEOBJ */
- start = so->start;
-
-#ifdef DEBUG_NOUVEAU_STATEOBJ
- if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) {
- debug_printf("previous so_method was not filled\n");
- assert(0);
- }
-#endif /* DEBUG_NOUVEAU_STATEOBJ */
-
- start[so->cur_start].gr = gr;
- start[so->cur_start].mthd = mthd;
- start[so->cur_start].size = size;
-
-#ifdef DEBUG_NOUVEAU_STATEOBJ
- if (so->pool_alloc < (size + so->pool_cur)) {
- debug_printf("exceeding num_pool size\n");
- assert(0);
- }
-#endif /* DEBUG_NOUVEAU_STATEOBJ */
-
- start[so->cur_start].offset = so->pool_cur;
- so->pool_cur += size;
-
- so->cur_start++;
- /* The 1 is for *this* begin_ring. */
- so->total += so->cur + 1;
- so->cur = 0;
-}
-
-static INLINE void
-so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo,
- unsigned data, unsigned flags, unsigned vor, unsigned tor)
-{
- struct nouveau_stateobj_reloc *r;
-
-#ifdef DEBUG_NOUVEAU_STATEOBJ
- if (so->reloc_alloc <= so->cur_reloc) {
- debug_printf("exceeding num_reloc size\n");
- assert(0);
- }
-#endif /* DEBUG_NOUVEAU_STATEOBJ */
- r = so->reloc;
-
- r[so->cur_reloc].bo = NULL;
- nouveau_bo_ref(bo, &(r[so->cur_reloc].bo));
- r[so->cur_reloc].gr = so->start[so->cur_start-1].gr;
- r[so->cur_reloc].push_offset = so->total + so->cur;
- r[so->cur_reloc].data = data;
- r[so->cur_reloc].flags = flags;
- r[so->cur_reloc].mthd = so->start[so->cur_start-1].mthd +
- (so->cur << 2);
- r[so->cur_reloc].vor = vor;
- r[so->cur_reloc].tor = tor;
-
- so_data(so, data);
- so->cur_reloc++;
-}
-
-/* Determine if this buffer object is referenced by this state object. */
-static INLINE boolean
-so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo)
-{
- int i;
-
- for (i = 0; i < so->cur_reloc; i++)
- if (so->reloc[i].bo == bo)
- return true;
-
- return false;
-}
-
-static INLINE void
-so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so)
-{
- unsigned nr, i;
- int ret = 0;
-
-#ifdef DEBUG_NOUVEAU_STATEOBJ
- if (so->start[so->cur_start - 1].size > so->cur) {
- debug_printf("emit: previous so_method was not filled\n");
- assert(0);
- }
-#endif /* DEBUG_NOUVEAU_STATEOBJ */
-
- /* We cannot update total in case we so_emit again. */
- nr = so->total + so->cur;
-
- /* This will flush if we need space.
- * We don't actually need the marker.
- */
- if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) {
- debug_printf("so_emit failed marker emit with error %d\n", ret);
- assert(0);
- }
-
- /* Submit data. This will ensure proper binding of objects. */
- for (i = 0; i < so->cur_start; i++) {
- BEGIN_RING(chan, so->start[i].gr, so->start[i].mthd, so->start[i].size);
- OUT_RINGp(chan, &(so->pool[so->start[i].offset]), so->start[i].size);
- }
-
- for (i = 0; i < so->cur_reloc; i++) {
- struct nouveau_stateobj_reloc *r = &so->reloc[i];
-
- if ((ret = nouveau_pushbuf_emit_reloc(chan, chan->cur - nr +
- r->push_offset, r->bo, r->data,
- 0, r->flags, r->vor, r->tor))) {
- debug_printf("so_emit failed reloc with error %d\n", ret);
- assert(0);
- }
- }
-}
-
-static INLINE void
-so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so)
-{
- unsigned i;
- int ret = 0;
-
- if (!so)
- return;
-
- /* If we need to flush in flush notify, then we have a problem anyway. */
- for (i = 0; i < so->cur_reloc; i++) {
- struct nouveau_stateobj_reloc *r = &so->reloc[i];
-
-#ifdef DEBUG_NOUVEAU_STATEOBJ
- if (r->mthd & 0x40000000) {
- debug_printf("error: NI mthd 0x%08X\n", r->mthd);
- continue;
- }
-#endif /* DEBUG_NOUVEAU_STATEOBJ */
-
- /* We don't need to autobind, since there are enough subchannels
- * for all objects we use. If this is changed, account for the extra
- * space in callers of this function.
- */
- assert(r->gr->bound != NOUVEAU_GROBJ_UNBOUND);
-
- /* Some relocs really don't like to be hammered,
- * NOUVEAU_BO_DUMMY makes sure it only
- * happens when needed.
- */
- ret = OUT_RELOC(chan, r->bo, (r->gr->subc << 13) | (1<< 18) |
- r->mthd, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART
- | NOUVEAU_BO_RDWR)) | NOUVEAU_BO_DUMMY, 0, 0);
- if (ret) {
- debug_printf("OUT_RELOC failed %d\n", ret);
- assert(0);
- }
-
- ret = OUT_RELOC(chan, r->bo, r->data, r->flags |
- NOUVEAU_BO_DUMMY, r->vor, r->tor);
- if (ret) {
- debug_printf("OUT_RELOC failed %d\n", ret);
- assert(0);
- }
- }
-}
-
-#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index ab480cabd0..484f870bd8 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -9,8 +9,9 @@
#include "nouveau/nouveau_device.h"
#include "nouveau/nouveau_grobj.h"
#include "nouveau/nouveau_notifier.h"
-#include "nouveau/nouveau_resource.h"
-#include "nouveau/nouveau_pushbuf.h"
+#ifndef NOUVEAU_NVC0
+#include "nouveau/nv04_pushbuf.h"
+#endif
#ifndef NV04_PFIFO_MAX_PACKET_LEN
#define NV04_PFIFO_MAX_PACKET_LEN 2047
@@ -41,4 +42,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
extern struct pipe_screen *
nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
+extern struct pipe_screen *
+nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
+
#endif
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index cb7653c3fe..a5b0d0478c 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -8,12 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- nv30-40_3d.xml ( 31709 bytes, from 2010-09-05 07:53:14)
-- copyright.xml ( 6503 bytes, from 2010-04-10 23:15:50)
-- nv_3ddefs.xml ( 15193 bytes, from 2010-09-05 07:50:15)
-- nv_defs.xml ( 4437 bytes, from 2010-08-05 19:38:53)
-- nv_object.xml ( 10424 bytes, from 2010-08-05 19:38:53)
-- nvchipsets.xml ( 2824 bytes, from 2010-08-05 19:38:53)
+- nv_object.xml ( 11547 bytes, from 2010-10-24 15:29:34)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
Copyright (C) 2006-2010 by the following authors:
- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
@@ -37,7 +35,7 @@ Copyright (C) 2006-2010 by the following authors:
- Mark Carey <mark.carey@gmail.com> (careym)
- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
-- Patrice Mandin <mandin.patrice@orange.fr> (pmandin, pmdata)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
- Peter Popov <ironpeter@users.sf.net> (ironpeter)
- Richard Hughes <hughsient@users.sf.net> (hughsient)
@@ -180,6 +178,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_COMPUTE 0x000050c0
#define NVA3_COMPUTE 0x000085c0
#define NVC0_COMPUTE 0x000090c0
+#define NV84_CRYPT 0x000074c1
#define NV01_SUBCHAN__SIZE 0x00002000
#define NV01_SUBCHAN 0x00000000
@@ -194,9 +193,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV84_SUBCHAN_QUERY_GET 0x0000001c
-#define NV84_SUBCHAN_UNK20 0x00000020
+#define NV84_SUBCHAN_QUERY_INTR 0x00000020
-#define NV84_SUBCHAN_UNK24 0x00000024
+#define NV84_SUBCHAN_WRCACHE_FLUSH 0x00000024
#define NV10_SUBCHAN_REF_CNT 0x00000050
@@ -209,7 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV11_SUBCHAN_SEMAPHORE_RELEASE 0x0000006c
-#define NV50_SUBCHAN_UNK80 0x00000080
+#define NV40_SUBCHAN_YIELD 0x00000080
#define NV01_GRAPH 0x00000000
@@ -227,5 +226,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV40_GRAPH_PM_TRIGGER 0x00000140
+#define NVC0_SUBCHAN__SIZE 0x00008000
+#define NVC0_SUBCHAN 0x00000000
+
+#define NVC0_SUBCHAN_OBJECT 0x00000000
+
+
+#define NVC0_SUBCHAN_QUERY_ADDRESS_HIGH 0x00000010
+
+#define NVC0_SUBCHAN_QUERY_ADDRESS_LOW 0x00000014
+
+#define NVC0_SUBCHAN_QUERY_SEQUENCE 0x00000018
+
+#define NVC0_SUBCHAN_QUERY_GET 0x0000001c
+
+#define NVC0_SUBCHAN_REF_CNT 0x00000050
+
+#define NVC0_GRAPH 0x00000000
+
+#define NVC0_GRAPH_NOP 0x00000100
+
+#define NVC0_GRAPH_NOTIFY_ADDRESS_HIGH 0x00000104
+
+#define NVC0_GRAPH_NOTIFY_ADDRESS_LOW 0x00000108
+
+#define NVC0_GRAPH_NOTIFY 0x0000010c
+#define NVC0_GRAPH_NOTIFY_WRITE 0x00000000
+#define NVC0_GRAPH_NOTIFY_WRITE_AND_AWAKEN 0x00000001
+
+#define NVC0_GRAPH_SERIALIZE 0x00000110
+
+#define NVC0_GRAPH_MACRO_UPLOAD_POS 0x00000114
+
+#define NVC0_GRAPH_MACRO_UPLOAD_DATA 0x00000118
+
+#define NVC0_GRAPH_MACRO_ID 0x0000011c
+
+#define NVC0_GRAPH_MACRO_POS 0x00000120
+
#endif /* NV_OBJECT_XML */
diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile
index bf1e8201a0..02bcc26cfb 100644
--- a/src/gallium/drivers/nv50/Makefile
+++ b/src/gallium/drivers/nv50/Makefile
@@ -4,13 +4,10 @@ include $(TOP)/configs/current
LIBNAME = nv50
C_SOURCES = \
- nv50_buffer.c \
- nv50_clear.c \
nv50_context.c \
nv50_draw.c \
nv50_formats.c \
nv50_miptree.c \
- nv50_query.c \
nv50_resource.c \
nv50_screen.c \
nv50_state.c \
@@ -19,7 +16,6 @@ C_SOURCES = \
nv50_tex.c \
nv50_transfer.c \
nv50_vbo.c \
- nv50_push.c \
nv50_program.c \
nv50_shader_state.c \
nv50_pc.c \
@@ -27,6 +23,11 @@ C_SOURCES = \
nv50_pc_emit.c \
nv50_tgsi_to_nc.c \
nv50_pc_optimize.c \
- nv50_pc_regalloc.c
+ nv50_pc_regalloc.c \
+ nv50_push.c \
+ nv50_query.c
+
+LIBRARY_INCLUDES = \
+ $(LIBDRM_CFLAGS)
include ../../Makefile.template
diff --git a/src/gallium/drivers/nv50/SConscript b/src/gallium/drivers/nv50/SConscript
index 8e7892a9ab..3c8a7276b9 100644
--- a/src/gallium/drivers/nv50/SConscript
+++ b/src/gallium/drivers/nv50/SConscript
@@ -5,13 +5,10 @@ env = env.Clone()
nv50 = env.ConvenienceLibrary(
target = 'nv50',
source = [
- 'nv50_buffer.c',
- 'nv50_clear.c',
'nv50_context.c',
'nv50_draw.c',
'nv50_formats.c',
'nv50_miptree.c',
- 'nv50_query.c',
'nv50_resource.c',
'nv50_screen.c',
'nv50_state.c',
@@ -20,7 +17,6 @@ nv50 = env.ConvenienceLibrary(
'nv50_tex.c',
'nv50_transfer.c',
'nv50_vbo.c',
- 'nv50_push.c',
'nv50_program.c',
'nv50_shader_state.c',
'nv50_pc.c',
@@ -29,6 +25,8 @@ nv50 = env.ConvenienceLibrary(
'nv50_tgsi_to_nc.c',
'nv50_pc_optimize.c',
'nv50_pc_regalloc.c',
+ 'nv50_push.c',
+ 'nv50_query.c'
])
Export('nv50')
diff --git a/src/gallium/drivers/nv50/nv50_2d.xml.h b/src/gallium/drivers/nv50/nv50_2d.xml.h
new file mode 100644
index 0000000000..bc9bcf7839
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_2d.xml.h
@@ -0,0 +1,393 @@
+#ifndef NV50_2D_XML
+#define NV50_2D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_2d.xml ( 9799 bytes, from 2010-12-28 17:17:11)
+- copyright.xml ( 6452 bytes, from 2010-12-15 23:45:18)
+- nv_object.xml ( 11898 bytes, from 2010-12-28 17:17:11)
+- nvchipsets.xml ( 3074 bytes, from 2010-12-15 23:45:18)
+- nv_defs.xml ( 4437 bytes, from 2010-12-15 23:45:18)
+- nv50_defs.xml ( 4487 bytes, from 2010-12-15 23:45:18)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+
+#define NV50_2D_DMA_NOTIFY 0x00000180
+
+#define NV50_2D_DMA_DST 0x00000184
+
+#define NV50_2D_DMA_SRC 0x00000188
+
+#define NV50_2D_DMA_COND 0x0000018c
+
+#define NV50_2D_DST_FORMAT 0x00000200
+
+#define NV50_2D_DST_LINEAR 0x00000204
+
+#define NV50_2D_DST_TILE_MODE 0x00000208
+
+#define NV50_2D_DST_DEPTH 0x0000020c
+
+#define NV50_2D_DST_LAYER 0x00000210
+
+#define NV50_2D_DST_PITCH 0x00000214
+
+#define NV50_2D_DST_WIDTH 0x00000218
+
+#define NV50_2D_DST_HEIGHT 0x0000021c
+
+#define NV50_2D_DST_ADDRESS_HIGH 0x00000220
+
+#define NV50_2D_DST_ADDRESS_LOW 0x00000224
+
+#define NV50_2D_UNK228 0x00000228
+
+#define NV50_2D_SRC_FORMAT 0x00000230
+
+#define NV50_2D_SRC_LINEAR 0x00000234
+
+#define NV50_2D_SRC_TILE_MODE 0x00000238
+
+#define NV50_2D_SRC_DEPTH 0x0000023c
+
+#define NV50_2D_SRC_LAYER 0x00000240
+
+#define NV50_2D_SRC_PITCH 0x00000244
+#define NV50_2D_SRC_PITCH__MAX 0x00040000
+
+#define NV50_2D_SRC_WIDTH 0x00000248
+#define NV50_2D_SRC_WIDTH__MAX 0x00010000
+
+#define NV50_2D_SRC_HEIGHT 0x0000024c
+#define NV50_2D_SRC_HEIGHT__MAX 0x00010000
+
+#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250
+
+#define NV50_2D_SRC_ADDRESS_LOW 0x00000254
+
+#define NV50_2D_UNK258 0x00000258
+
+#define NV50_2D_UNK260 0x00000260
+
+#define NV50_2D_COND_ADDRESS_HIGH 0x00000264
+
+#define NV50_2D_COND_ADDRESS_LOW 0x00000268
+
+#define NV50_2D_COND_MODE 0x0000026c
+#define NV50_2D_COND_MODE_NEVER 0x00000000
+#define NV50_2D_COND_MODE_ALWAYS 0x00000001
+#define NV50_2D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NV50_2D_COND_MODE_EQUAL 0x00000003
+#define NV50_2D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NV50_2D_CLIP_X 0x00000280
+
+#define NV50_2D_CLIP_Y 0x00000284
+
+#define NV50_2D_CLIP_W 0x00000288
+
+#define NV50_2D_CLIP_H 0x0000028c
+
+#define NV50_2D_CLIP_ENABLE 0x00000290
+
+#define NV50_2D_COLOR_KEY_FORMAT 0x00000294
+#define NV50_2D_COLOR_KEY_FORMAT_16BPP 0x00000000
+#define NV50_2D_COLOR_KEY_FORMAT_15BPP 0x00000001
+#define NV50_2D_COLOR_KEY_FORMAT_24BPP 0x00000002
+#define NV50_2D_COLOR_KEY_FORMAT_30BPP 0x00000003
+#define NV50_2D_COLOR_KEY_FORMAT_8BPP 0x00000004
+#define NV50_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005
+#define NV50_2D_COLOR_KEY_FORMAT_32BPP 0x00000006
+
+#define NV50_2D_COLOR_KEY 0x00000298
+
+#define NV50_2D_COLOR_KEY_ENABLE 0x0000029c
+
+#define NV50_2D_ROP 0x000002a0
+
+#define NV50_2D_BETA1 0x000002a4
+
+#define NV50_2D_BETA4 0x000002a8
+
+#define NV50_2D_OPERATION 0x000002ac
+#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000
+#define NV50_2D_OPERATION_ROP_AND 0x00000001
+#define NV50_2D_OPERATION_BLEND_AND 0x00000002
+#define NV50_2D_OPERATION_SRCCOPY 0x00000003
+#define NV50_2D_OPERATION_UNK4 0x00000004
+#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000005
+#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000006
+
+#define NV50_2D_UNK2B0 0x000002b0
+#define NV50_2D_UNK2B0_UNK0__MASK 0x0000003f
+#define NV50_2D_UNK2B0_UNK0__SHIFT 0
+#define NV50_2D_UNK2B0_UNK1__MASK 0x00003f00
+#define NV50_2D_UNK2B0_UNK1__SHIFT 8
+
+#define NV50_2D_PATTERN_SELECT 0x000002b4
+#define NV50_2D_PATTERN_SELECT_MONO_8X8 0x00000000
+#define NV50_2D_PATTERN_SELECT_MONO_64X1 0x00000001
+#define NV50_2D_PATTERN_SELECT_MONO_1X64 0x00000002
+#define NV50_2D_PATTERN_SELECT_COLOR 0x00000003
+
+#define NV50_2D_PATTERN_COLOR_FORMAT 0x000002e8
+#define NV50_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000
+#define NV50_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001
+#define NV50_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002
+#define NV50_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003
+#define NV50_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004
+#define NV50_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005
+
+#define NV50_2D_PATTERN_MONO_FORMAT 0x000002ec
+#define NV50_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000
+#define NV50_2D_PATTERN_MONO_FORMAT_LE 0x00000001
+
+#define NV50_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0))
+#define NV50_2D_PATTERN_COLOR__ESIZE 0x00000004
+#define NV50_2D_PATTERN_COLOR__LEN 0x00000002
+
+#define NV50_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0))
+#define NV50_2D_PATTERN_BITMAP__ESIZE 0x00000004
+#define NV50_2D_PATTERN_BITMAP__LEN 0x00000002
+
+#define NV50_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0))
+#define NV50_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004
+#define NV50_2D_PATTERN_X8R8G8B8__LEN 0x00000040
+#define NV50_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff
+#define NV50_2D_PATTERN_X8R8G8B8_B__SHIFT 0
+#define NV50_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00
+#define NV50_2D_PATTERN_X8R8G8B8_G__SHIFT 8
+#define NV50_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000
+#define NV50_2D_PATTERN_X8R8G8B8_R__SHIFT 16
+
+#define NV50_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0))
+#define NV50_2D_PATTERN_R5G6B5__ESIZE 0x00000004
+#define NV50_2D_PATTERN_R5G6B5__LEN 0x00000020
+#define NV50_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f
+#define NV50_2D_PATTERN_R5G6B5_B0__SHIFT 0
+#define NV50_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0
+#define NV50_2D_PATTERN_R5G6B5_G0__SHIFT 5
+#define NV50_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800
+#define NV50_2D_PATTERN_R5G6B5_R0__SHIFT 11
+#define NV50_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000
+#define NV50_2D_PATTERN_R5G6B5_B1__SHIFT 16
+#define NV50_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000
+#define NV50_2D_PATTERN_R5G6B5_G1__SHIFT 21
+#define NV50_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000
+#define NV50_2D_PATTERN_R5G6B5_R1__SHIFT 27
+
+#define NV50_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0))
+#define NV50_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004
+#define NV50_2D_PATTERN_X1R5G5B5__LEN 0x00000020
+#define NV50_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f
+#define NV50_2D_PATTERN_X1R5G5B5_B0__SHIFT 0
+#define NV50_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0
+#define NV50_2D_PATTERN_X1R5G5B5_G0__SHIFT 5
+#define NV50_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00
+#define NV50_2D_PATTERN_X1R5G5B5_R0__SHIFT 10
+#define NV50_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000
+#define NV50_2D_PATTERN_X1R5G5B5_B1__SHIFT 16
+#define NV50_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000
+#define NV50_2D_PATTERN_X1R5G5B5_G1__SHIFT 21
+#define NV50_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000
+#define NV50_2D_PATTERN_X1R5G5B5_R1__SHIFT 26
+
+#define NV50_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0))
+#define NV50_2D_PATTERN_Y8__ESIZE 0x00000004
+#define NV50_2D_PATTERN_Y8__LEN 0x00000010
+#define NV50_2D_PATTERN_Y8_Y0__MASK 0x000000ff
+#define NV50_2D_PATTERN_Y8_Y0__SHIFT 0
+#define NV50_2D_PATTERN_Y8_Y1__MASK 0x0000ff00
+#define NV50_2D_PATTERN_Y8_Y1__SHIFT 8
+#define NV50_2D_PATTERN_Y8_Y2__MASK 0x00ff0000
+#define NV50_2D_PATTERN_Y8_Y2__SHIFT 16
+#define NV50_2D_PATTERN_Y8_Y3__MASK 0xff000000
+#define NV50_2D_PATTERN_Y8_Y3__SHIFT 24
+
+#define NV50_2D_DRAW_SHAPE 0x00000580
+#define NV50_2D_DRAW_SHAPE_POINTS 0x00000000
+#define NV50_2D_DRAW_SHAPE_LINES 0x00000001
+#define NV50_2D_DRAW_SHAPE_LINE_STRIP 0x00000002
+#define NV50_2D_DRAW_SHAPE_TRIANGLES 0x00000003
+#define NV50_2D_DRAW_SHAPE_RECTANGLES 0x00000004
+
+#define NV50_2D_DRAW_COLOR_FORMAT 0x00000584
+
+#define NV50_2D_DRAW_COLOR 0x00000588
+
+#define NV50_2D_UNK58C 0x0000058c
+#define NV50_2D_UNK58C_0 0x00000001
+#define NV50_2D_UNK58C_1 0x00000010
+#define NV50_2D_UNK58C_2 0x00000100
+#define NV50_2D_UNK58C_3 0x00001000
+
+#define NV50_2D_DRAW_POINT16 0x000005e0
+#define NV50_2D_DRAW_POINT16_X__MASK 0x0000ffff
+#define NV50_2D_DRAW_POINT16_X__SHIFT 0
+#define NV50_2D_DRAW_POINT16_Y__MASK 0xffff0000
+#define NV50_2D_DRAW_POINT16_Y__SHIFT 16
+
+#define NV50_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0))
+#define NV50_2D_DRAW_POINT32_X__ESIZE 0x00000008
+#define NV50_2D_DRAW_POINT32_X__LEN 0x00000040
+
+#define NV50_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0))
+#define NV50_2D_DRAW_POINT32_Y__ESIZE 0x00000008
+#define NV50_2D_DRAW_POINT32_Y__LEN 0x00000040
+
+#define NV50_2D_SIFC_BITMAP_ENABLE 0x00000800
+
+#define NV50_2D_SIFC_FORMAT 0x00000804
+
+#define NV50_2D_SIFC_BITMAP_FORMAT 0x00000808
+#define NV50_2D_SIFC_BITMAP_FORMAT_I1 0x00000000
+#define NV50_2D_SIFC_BITMAP_FORMAT_I4 0x00000001
+#define NV50_2D_SIFC_BITMAP_FORMAT_I8 0x00000002
+
+#define NV50_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c
+
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002
+
+#define NV50_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814
+
+#define NV50_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818
+
+#define NV50_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c
+
+#define NV50_2D_SIFC_WIDTH 0x00000838
+
+#define NV50_2D_SIFC_HEIGHT 0x0000083c
+
+#define NV50_2D_SIFC_DX_DU_FRACT 0x00000840
+
+#define NV50_2D_SIFC_DX_DU_INT 0x00000844
+
+#define NV50_2D_SIFC_DY_DV_FRACT 0x00000848
+
+#define NV50_2D_SIFC_DY_DV_INT 0x0000084c
+
+#define NV50_2D_SIFC_DST_X_FRACT 0x00000850
+
+#define NV50_2D_SIFC_DST_X_INT 0x00000854
+
+#define NV50_2D_SIFC_DST_Y_FRACT 0x00000858
+
+#define NV50_2D_SIFC_DST_Y_INT 0x0000085c
+
+#define NV50_2D_SIFC_DATA 0x00000860
+
+#define NV50_2D_UNK0870 0x00000870
+
+#define NV50_2D_UNK0880 0x00000880
+
+#define NV50_2D_UNK0884 0x00000884
+
+#define NV50_2D_UNK0888 0x00000888
+
+#define NV50_2D_BLIT_CONTROL 0x0000088c
+#define NV50_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001
+#define NV50_2D_BLIT_CONTROL_ORIGIN__SHIFT 0
+#define NV50_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000
+#define NV50_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001
+#define NV50_2D_BLIT_CONTROL_FILTER__MASK 0x00000010
+#define NV50_2D_BLIT_CONTROL_FILTER__SHIFT 4
+#define NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000
+#define NV50_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010
+
+#define NV50_2D_BLIT_DST_X 0x000008b0
+
+#define NV50_2D_BLIT_DST_Y 0x000008b4
+
+#define NV50_2D_BLIT_DST_W 0x000008b8
+
+#define NV50_2D_BLIT_DST_H 0x000008bc
+
+#define NV50_2D_BLIT_DU_DX_FRACT 0x000008c0
+
+#define NV50_2D_BLIT_DU_DX_INT 0x000008c4
+
+#define NV50_2D_BLIT_DV_DY_FRACT 0x000008c8
+
+#define NV50_2D_BLIT_DV_DY_INT 0x000008cc
+
+#define NV50_2D_BLIT_SRC_X_FRACT 0x000008d0
+
+#define NV50_2D_BLIT_SRC_X_INT 0x000008d4
+
+#define NV50_2D_BLIT_SRC_Y_FRACT 0x000008d8
+
+#define NV50_2D_BLIT_SRC_Y_INT 0x000008dc
+
+#define NVC0_2D_FIRMWARE(i0) (0x000008e0 + 0x4*(i0))
+#define NVC0_2D_FIRMWARE__ESIZE 0x00000004
+#define NVC0_2D_FIRMWARE__LEN 0x00000020
+
+
+#endif /* NV50_2D_XML */
diff --git a/src/gallium/drivers/nv50/nv50_3d.xml.h b/src/gallium/drivers/nv50/nv50_3d.xml.h
new file mode 100644
index 0000000000..9bb3211728
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_3d.xml.h
@@ -0,0 +1,2084 @@
+#ifndef NV50_3D_XML
+#define NV50_3D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_3d.xml ( 64479 bytes, from 2011-02-27 17:58:08)
+- copyright.xml ( 6452 bytes, from 2010-12-15 23:45:18)
+- nv_defs.xml ( 4437 bytes, from 2010-12-15 23:45:18)
+- nv50_defs.xml ( 4487 bytes, from 2010-12-15 23:45:18)
+- nv_3ddefs.xml ( 16394 bytes, from 2010-12-15 23:45:18)
+- nv_object.xml ( 12191 bytes, from 2011-02-27 17:58:08)
+- nvchipsets.xml ( 3074 bytes, from 2011-02-27 17:58:08)
+
+Copyright (C) 2006-2011 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#define NV50_3D_SERIALIZE 0x00000110
+
+#define NV50_3D_DMA_NOTIFY 0x00000180
+
+#define NV50_3D_DMA_ZETA 0x00000184
+
+#define NV50_3D_DMA_QUERY 0x00000188
+
+#define NV50_3D_DMA_VTXBUF 0x0000018c
+
+#define NV50_3D_DMA_LOCAL 0x00000190
+
+#define NV50_3D_DMA_STACK 0x00000194
+
+#define NV50_3D_DMA_CODE_CB 0x00000198
+
+#define NV50_3D_DMA_TSC 0x0000019c
+
+#define NV50_3D_DMA_TIC 0x000001a0
+
+#define NV50_3D_DMA_TEXTURE 0x000001a4
+
+#define NV50_3D_DMA_STRMOUT 0x000001a8
+
+#define NV50_3D_DMA_CLIPID 0x000001ac
+
+#define NV50_3D_DMA_COLOR(i0) (0x000001c0 + 0x4*(i0))
+#define NV50_3D_DMA_COLOR__ESIZE 0x00000004
+#define NV50_3D_DMA_COLOR__LEN 0x00000008
+
+#define NV50_3D_RT(i0) (0x00000200 + 0x20*(i0))
+#define NV50_3D_RT__ESIZE 0x00000020
+#define NV50_3D_RT__LEN 0x00000008
+
+#define NV50_3D_RT_ADDRESS_HIGH(i0) (0x00000200 + 0x20*(i0))
+
+#define NV50_3D_RT_ADDRESS_LOW(i0) (0x00000204 + 0x20*(i0))
+
+#define NV50_3D_RT_FORMAT(i0) (0x00000208 + 0x20*(i0))
+
+#define NV50_3D_RT_TILE_MODE(i0) (0x0000020c + 0x20*(i0))
+#define NV50_3D_RT_TILE_MODE_X__MASK 0x0000000f
+#define NV50_3D_RT_TILE_MODE_X__SHIFT 0
+#define NV50_3D_RT_TILE_MODE_Y__MASK 0x000000f0
+#define NV50_3D_RT_TILE_MODE_Y__SHIFT 4
+#define NV50_3D_RT_TILE_MODE_Z__MASK 0x00000f00
+#define NV50_3D_RT_TILE_MODE_Z__SHIFT 8
+
+#define NV50_3D_RT_LAYER_STRIDE(i0) (0x00000210 + 0x20*(i0))
+#define NV50_3D_RT_LAYER_STRIDE__SHR 2
+
+#define NV50_3D_RT_UNK14(i0) (0x00000214 + 0x20*(i0))
+
+#define NV50_3D_VTX_ATTR_1F(i0) (0x00000300 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_1F__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_1F__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_2H(i0) (0x00000340 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_2H__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_2H__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_2H_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_2H_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_2H_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_2H_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_2F_X(i0) (0x00000380 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_2F_X__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_2F_X__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_2F_Y(i0) (0x00000384 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_2F_Y__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_2F_Y__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_3F_X(i0) (0x00000400 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_3F_X__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_3F_X__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_3F_Y(i0) (0x00000404 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_3F_Y__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_3F_Y__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_3F_Z(i0) (0x00000408 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_3F_Z__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_3F_Z__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_X(i0) (0x00000500 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_X__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_X__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_Y(i0) (0x00000504 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_Y__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_Y__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_Z(i0) (0x00000508 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_Z__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_Z__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_W(i0) (0x0000050c + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_W__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_W__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4H_0(i0) (0x00000600 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4H_0__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4H_0__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4H_0_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4H_0_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4H_0_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4H_0_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4H_1(i0) (0x00000604 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4H_1__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4H_1__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4H_1_Z__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4H_1_Z__SHIFT 0
+#define NV50_3D_VTX_ATTR_4H_1_W__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4H_1_W__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_2I(i0) (0x00000680 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_2I__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_2I__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_2I_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_2I_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_2I_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_2I_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_2NI(i0) (0x000006c0 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_2NI__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_2NI__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_2NI_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_2NI_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_2NI_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_2NI_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4I_0(i0) (0x00000700 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4I_0__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4I_0__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4I_0_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4I_0_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4I_0_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4I_0_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4I_1(i0) (0x00000704 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4I_1__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4I_1__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4I_1_Z__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4I_1_Z__SHIFT 0
+#define NV50_3D_VTX_ATTR_4I_1_W__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4I_1_W__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4NI_0(i0) (0x00000780 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4NI_0__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4NI_0__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NI_0_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4NI_0_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NI_0_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4NI_0_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4NI_1(i0) (0x00000784 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4NI_1__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4NI_1__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NI_1_Z__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4NI_1_Z__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NI_1_W__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4NI_1_W__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4UB(i0) (0x00000800 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4UB__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4UB__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4UB_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4UB_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4UB_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4UB_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4UB_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4UB_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4UB_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4UB_W__SHIFT 24
+
+#define NV50_3D_VTX_ATTR_4B(i0) (0x00000840 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4B__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4B__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4B_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4B_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4B_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4B_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4B_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4B_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4B_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4B_W__SHIFT 24
+
+#define NV50_3D_VTX_ATTR_4NUB(i0) (0x00000880 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4NUB__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4NUB__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NUB_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4NUB_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NUB_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4NUB_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4NUB_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4NUB_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4NUB_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4NUB_W__SHIFT 24
+
+#define NV50_3D_VTX_ATTR_4NB(i0) (0x000008c0 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4NB__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4NB__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NB_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4NB_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NB_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4NB_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4NB_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4NB_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4NB_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4NB_W__SHIFT 24
+
+#define NV50_3D_VERTEX_ARRAY_FETCH(i0) (0x00000900 + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_FETCH__LEN 0x00000010
+#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff
+#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0
+#define NV50_3D_VERTEX_ARRAY_FETCH_ENABLE 0x20000000
+
+#define NV50_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00000904 + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_START_LOW(i0) (0x00000908 + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_DIVISOR(i0) (0x0000090c + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_SCALE_X__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_SCALE_Y__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_SCALE_Z__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0))
+#define NV50_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0))
+#define NV50_3D_VIEWPORT_HORIZ__ESIZE 0x00000010
+#define NV50_3D_VIEWPORT_HORIZ__LEN 0x00000010
+#define NV50_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff
+#define NV50_3D_VIEWPORT_HORIZ_X__SHIFT 0
+#define NV50_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000
+#define NV50_3D_VIEWPORT_HORIZ_W__SHIFT 16
+
+#define NV50_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0))
+#define NV50_3D_VIEWPORT_VERT__ESIZE 0x00000010
+#define NV50_3D_VIEWPORT_VERT__LEN 0x00000010
+#define NV50_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff
+#define NV50_3D_VIEWPORT_VERT_Y__SHIFT 0
+#define NV50_3D_VIEWPORT_VERT_H__MASK 0xffff0000
+#define NV50_3D_VIEWPORT_VERT_H__SHIFT 16
+
+#define NV50_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0))
+#define NV50_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010
+#define NV50_3D_DEPTH_RANGE_NEAR__LEN 0x00000010
+
+#define NV50_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0))
+#define NV50_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
+#define NV50_3D_DEPTH_RANGE_FAR__LEN 0x00000010
+
+#define NV50_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0))
+#define NV50_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008
+#define NV50_3D_CLIP_RECT_HORIZ__LEN 0x00000008
+#define NV50_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff
+#define NV50_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0
+#define NV50_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000
+#define NV50_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16
+
+#define NV50_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0))
+#define NV50_3D_CLIP_RECT_VERT__ESIZE 0x00000008
+#define NV50_3D_CLIP_RECT_VERT__LEN 0x00000008
+#define NV50_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff
+#define NV50_3D_CLIP_RECT_VERT_MIN__SHIFT 0
+#define NV50_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000
+#define NV50_3D_CLIP_RECT_VERT_MAX__SHIFT 16
+
+#define NV50_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
+#define NV50_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
+#define NV50_3D_CLIPID_REGION_HORIZ__LEN 0x00000004
+#define NV50_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff
+#define NV50_3D_CLIPID_REGION_HORIZ_X__SHIFT 0
+#define NV50_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000
+#define NV50_3D_CLIPID_REGION_HORIZ_W__SHIFT 16
+
+#define NV50_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0))
+#define NV50_3D_CLIPID_REGION_VERT__ESIZE 0x00000008
+#define NV50_3D_CLIPID_REGION_VERT__LEN 0x00000004
+#define NV50_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff
+#define NV50_3D_CLIPID_REGION_VERT_Y__SHIFT 0
+#define NV50_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
+#define NV50_3D_CLIPID_REGION_VERT_H__SHIFT 16
+
+#define NV50_3D_UNK0D60 0x00000d60
+
+#define NV50_3D_UNK0D64 0x00000d64
+
+#define NV50_3D_COUNTER_ENABLE 0x00000d68
+#define NV50_3D_COUNTER_ENABLE_VFETCH_VERTICES 0x00000001
+#define NV50_3D_COUNTER_ENABLE_VFETCH_PRIMITIVES 0x00000002
+#define NV50_3D_COUNTER_ENABLE_VP_LAUNCHES 0x00000004
+#define NV50_3D_COUNTER_ENABLE_GP_LAUNCHES 0x00000008
+#define NV50_3D_COUNTER_ENABLE_GP_PRIMITIVES_OUT 0x00000010
+#define NV50_3D_COUNTER_ENABLE_TRANSFORM_FEEDBACK 0x00000020
+#define NV50_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000040
+#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_PRECLIP 0x00000080
+#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_POSTCLIP 0x00000100
+#define NV50_3D_COUNTER_ENABLE_FP_PIXELS 0x00000200
+#define NV84_3D_COUNTER_ENABLE_UNK0A 0x00000400
+
+#define NV50_3D_UNK0D6C(i0) (0x00000d6c + 0x4*(i0))
+#define NV50_3D_UNK0D6C__ESIZE 0x00000004
+#define NV50_3D_UNK0D6C__LEN 0x00000002
+#define NV50_3D_UNK0D6C_X__MASK 0x0000ffff
+#define NV50_3D_UNK0D6C_X__SHIFT 0
+#define NV50_3D_UNK0D6C_Y__MASK 0xffff0000
+#define NV50_3D_UNK0D6C_Y__SHIFT 16
+
+#define NV50_3D_VERTEX_BUFFER_FIRST 0x00000d74
+
+#define NV50_3D_VERTEX_BUFFER_COUNT 0x00000d78
+
+#define NV50_3D_UNK0D7C 0x00000d7c
+
+#define NV50_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0))
+#define NV50_3D_CLEAR_COLOR__ESIZE 0x00000004
+#define NV50_3D_CLEAR_COLOR__LEN 0x00000004
+
+#define NV50_3D_CLEAR_DEPTH 0x00000d90
+
+#define NV50_3D_STACK_ADDRESS_HIGH 0x00000d94
+
+#define NV50_3D_STACK_ADDRESS_LOW 0x00000d98
+
+#define NV50_3D_STACK_SIZE_LOG 0x00000d9c
+
+#define NV50_3D_CLEAR_STENCIL 0x00000da0
+
+#define NV50_3D_STRMOUT_PARAMS_LATCH 0x00000da4
+
+#define NV50_3D_STRMOUT_PRIMITIVE_LIMIT 0x00000da8
+
+#define NV50_3D_POLYGON_MODE_FRONT 0x00000dac
+#define NV50_3D_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV50_3D_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV50_3D_POLYGON_MODE_FRONT_FILL 0x00001b02
+
+#define NV50_3D_POLYGON_MODE_BACK 0x00000db0
+#define NV50_3D_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV50_3D_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV50_3D_POLYGON_MODE_BACK_FILL 0x00001b02
+
+#define NV50_3D_POLYGON_SMOOTH_ENABLE 0x00000db4
+
+#define NV50_3D_UNK0DB8 0x00000db8
+
+#define NV50_3D_ZCULL_UNK0DBC 0x00000dbc
+#define NV50_3D_ZCULL_UNK0DBC_UNK0 0x00000001
+#define NV50_3D_ZCULL_UNK0DBC_UNK16__MASK 0x00030000
+#define NV50_3D_ZCULL_UNK0DBC_UNK16__SHIFT 16
+
+#define NV50_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
+
+#define NV50_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
+
+#define NV50_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
+
+#define NV50_3D_UNK0DCC 0x00000dcc
+
+#define NV50_3D_VTX_ATTR_MASK_UNK0DD0(i0) (0x00000dd0 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__LEN 0x00000002
+
+#define NV50_3D_ZCULL_UNK0DD8 0x00000dd8
+#define NV50_3D_ZCULL_UNK0DD8_UNK0__MASK 0x00000007
+#define NV50_3D_ZCULL_UNK0DD8_UNK0__SHIFT 0
+#define NVA3_3D_ZCULL_UNK0DD8_UNK9 0x00000200
+#define NV50_3D_ZCULL_UNK0DD8_UNK16__MASK 0xffff0000
+#define NV50_3D_ZCULL_UNK0DD8_UNK16__SHIFT 16
+
+#define NV50_3D_UNK0DDC 0x00000ddc
+
+#define NV50_3D_UNK0DE0 0x00000de0
+
+#define NV50_3D_WATCHDOG_TIMER 0x00000de4
+
+#define NV50_3D_UNK0DE8 0x00000de8
+
+#define NV50_3D_UNK0DEC 0x00000dec
+
+#define NV50_3D_UNK0DF0 0x00000df0
+#define NV50_3D_UNK0DF0_UNK0 0x00000001
+#define NV50_3D_UNK0DF0_UNK1__MASK 0x00000ff0
+#define NV50_3D_UNK0DF0_UNK1__SHIFT 4
+
+#define NV50_3D_UNK0DF4 0x00000df4
+
+#define NV50_3D_WINDOW_OFFSET_X 0x00000df8
+
+#define NV50_3D_WINDOW_OFFSET_Y 0x00000dfc
+
+#define NV50_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0))
+#define NV50_3D_SCISSOR_ENABLE__ESIZE 0x00000010
+#define NV50_3D_SCISSOR_ENABLE__LEN 0x00000010
+
+#define NV50_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0))
+#define NV50_3D_SCISSOR_HORIZ__ESIZE 0x00000010
+#define NV50_3D_SCISSOR_HORIZ__LEN 0x00000010
+#define NV50_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff
+#define NV50_3D_SCISSOR_HORIZ_MIN__SHIFT 0
+#define NV50_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000
+#define NV50_3D_SCISSOR_HORIZ_MAX__SHIFT 16
+
+#define NV50_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0))
+#define NV50_3D_SCISSOR_VERT__ESIZE 0x00000010
+#define NV50_3D_SCISSOR_VERT__LEN 0x00000010
+#define NV50_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff
+#define NV50_3D_SCISSOR_VERT_MIN__SHIFT 0
+#define NV50_3D_SCISSOR_VERT_MAX__MASK 0xffff0000
+#define NV50_3D_SCISSOR_VERT_MAX__SHIFT 16
+
+#define NV50_3D_CB_ADDR 0x00000f00
+#define NV50_3D_CB_ADDR_ID__MASK 0x003fff00
+#define NV50_3D_CB_ADDR_ID__SHIFT 8
+#define NV50_3D_CB_ADDR_BUFFER__MASK 0x0000007f
+#define NV50_3D_CB_ADDR_BUFFER__SHIFT 0
+
+#define NV50_3D_CB_DATA(i0) (0x00000f04 + 0x4*(i0))
+#define NV50_3D_CB_DATA__ESIZE 0x00000004
+#define NV50_3D_CB_DATA__LEN 0x00000010
+
+#define NV50_3D_LOCAL_WARPS_LOG_ALLOC 0x00000f44
+
+#define NV50_3D_LOCAL_WARPS_NO_CLAMP 0x00000f48
+
+#define NV50_3D_STACK_WARPS_LOG_ALLOC 0x00000f4c
+
+#define NV50_3D_STACK_WARPS_NO_CLAMP 0x00000f50
+
+#define NV50_3D_STENCIL_BACK_FUNC_REF 0x00000f54
+
+#define NV50_3D_STENCIL_BACK_MASK 0x00000f58
+
+#define NV50_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c
+
+#define NV50_3D_UNK0F60(i0) (0x00000f60 + 0x4*(i0))
+#define NV50_3D_UNK0F60__ESIZE 0x00000004
+#define NV50_3D_UNK0F60__LEN 0x00000004
+
+#define NV50_3D_GP_ADDRESS_HIGH 0x00000f70
+
+#define NV50_3D_GP_ADDRESS_LOW 0x00000f74
+
+#define NV50_3D_UNK0F78 0x00000f78
+
+#define NV50_3D_VP_ADDRESS_HIGH 0x00000f7c
+
+#define NV50_3D_VP_ADDRESS_LOW 0x00000f80
+
+#define NV50_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84
+
+#define NV50_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88
+
+#define NV50_3D_UNK0F8C 0x00000f8c
+
+#define NV50_3D_UNK0F90 0x00000f90
+
+#define NV50_3D_UNK0F94 0x00000f94
+
+#define NV50_3D_UNK0F98 0x00000f98
+
+#define NV50_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0))
+#define NV50_3D_DEPTH_BOUNDS__ESIZE 0x00000004
+#define NV50_3D_DEPTH_BOUNDS__LEN 0x00000002
+
+#define NV50_3D_FP_ADDRESS_HIGH 0x00000fa4
+
+#define NV50_3D_FP_ADDRESS_LOW 0x00000fa8
+
+#define NV50_3D_UNK0FAC 0x00000fac
+#define NV50_3D_UNK0FAC_UNK0 0x00000001
+#define NVA0_3D_UNK0FAC_UNK2 0x00000002
+#define NV50_3D_UNK0FAC_UNK1__MASK 0x000ffff0
+#define NV50_3D_UNK0FAC_UNK1__SHIFT 4
+
+#define NV50_3D_UNK0FB0 0x00000fb0
+
+#define NV50_3D_UNK0FB4 0x00000fb4
+
+#define NV50_3D_UNK0FB8 0x00000fb8
+
+#define NV50_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0))
+#define NV50_3D_MSAA_MASK__ESIZE 0x00000004
+#define NV50_3D_MSAA_MASK__LEN 0x00000004
+
+#define NV50_3D_CLIPID_ADDRESS_HIGH 0x00000fcc
+
+#define NV50_3D_CLIPID_ADDRESS_LOW 0x00000fd0
+
+#define NV50_3D_MAP_SEMANTIC_5 0x00000fd4
+#define NV50_3D_MAP_SEMANTIC_5_VIEWPORT_ID__MASK 0x000000ff
+#define NV50_3D_MAP_SEMANTIC_5_VIEWPORT_ID__SHIFT 0
+
+#define NV50_3D_UNK0FD8 0x00000fd8
+#define NV50_3D_UNK0FD8_UNK0 0x00000001
+#define NV50_3D_UNK0FD8_UNK1 0x00000010
+
+#define NV50_3D_UNK0FDC 0x00000fdc
+
+#define NV50_3D_ZETA_ADDRESS_HIGH 0x00000fe0
+
+#define NV50_3D_ZETA_ADDRESS_LOW 0x00000fe4
+
+#define NV50_3D_ZETA_FORMAT 0x00000fe8
+
+#define NV50_3D_ZETA_TILE_MODE 0x00000fec
+
+#define NV50_3D_ZETA_LAYER_STRIDE 0x00000ff0
+#define NV50_3D_ZETA_LAYER_STRIDE__SHR 2
+
+#define NV50_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0
+
+#define NV50_3D_SCREEN_SCISSOR_VERT 0x00000ff8
+#define NV50_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000
+#define NV50_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16
+#define NV50_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
+#define NV50_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
+
+#define NV50_3D_UNK0FFC 0x00000ffc
+
+#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001000 + 0x4*(i0))
+#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004
+#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000010
+
+#define NV50_3D_UNK1040(i0) (0x00001040 + 0x4*(i0))
+#define NV50_3D_UNK1040__ESIZE 0x00000004
+#define NV50_3D_UNK1040__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001080 + 0x8*(i0))
+#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008
+#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001084 + 0x8*(i0))
+#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008
+#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000010
+
+#define NV50_3D_UNK1100 0x00001100
+
+#define NV84_3D_UNK1104 0x00001104
+#define NV84_3D_UNK1104_0__MASK 0x0000ffff
+#define NV84_3D_UNK1104_0__SHIFT 0
+#define NV84_3D_UNK1104_0__MAX 0x00002000
+#define NV84_3D_UNK1104_0__ALIGN 0x00000040
+#define NV84_3D_UNK1104_1__MASK 0xffff0000
+#define NV84_3D_UNK1104_1__SHIFT 16
+#define NV84_3D_UNK1104_1__MAX 0x00002000
+#define NV84_3D_UNK1104_1__ALIGN 0x00000040
+
+#define NV84_3D_UNK1108 0x00001108
+#define NV84_3D_UNK1108_0 0x00000001
+#define NV84_3D_UNK1108_1 0x00000010
+
+#define NV84_3D_UNK110C 0x0000110c
+
+#define NV84_3D_UNK1110 0x00001110
+
+#define NV84_3D_WRCACHE_FLUSH 0x00001114
+
+#define NV84_3D_VERTEX_ID_BASE 0x00001118
+
+#define NV84_3D_PRIMITIVE_ID 0x0000111c
+
+#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT(i0) (0x00001120 + 0x4*(i0))
+#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__ESIZE 0x00000004
+#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__LEN 0x00000004
+
+#define NVA3_3D_VP_ATTR_EN_ALT(i0) (0x00001130 + 0x4*(i0))
+#define NVA3_3D_VP_ATTR_EN_ALT__ESIZE 0x00000004
+#define NVA3_3D_VP_ATTR_EN_ALT__LEN 0x00000004
+#define NVA3_3D_VP_ATTR_EN_ALT_7__MASK 0xf0000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7__SHIFT 28
+#define NVA3_3D_VP_ATTR_EN_ALT_7_X 0x10000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7_Y 0x20000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7_Z 0x40000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7_W 0x80000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6__MASK 0x0f000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6__SHIFT 24
+#define NVA3_3D_VP_ATTR_EN_ALT_6_X 0x01000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6_Y 0x02000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6_Z 0x04000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6_W 0x08000000
+#define NVA3_3D_VP_ATTR_EN_ALT_5__MASK 0x00f00000
+#define NVA3_3D_VP_ATTR_EN_ALT_5__SHIFT 20
+#define NVA3_3D_VP_ATTR_EN_ALT_5_X 0x00100000
+#define NVA3_3D_VP_ATTR_EN_ALT_5_Y 0x00200000
+#define NVA3_3D_VP_ATTR_EN_ALT_5_Z 0x00400000
+#define NVA3_3D_VP_ATTR_EN_ALT_5_W 0x00800000
+#define NVA3_3D_VP_ATTR_EN_ALT_4__MASK 0x000f0000
+#define NVA3_3D_VP_ATTR_EN_ALT_4__SHIFT 16
+#define NVA3_3D_VP_ATTR_EN_ALT_4_X 0x00010000
+#define NVA3_3D_VP_ATTR_EN_ALT_4_Y 0x00020000
+#define NVA3_3D_VP_ATTR_EN_ALT_4_Z 0x00040000
+#define NVA3_3D_VP_ATTR_EN_ALT_4_W 0x00080000
+#define NVA3_3D_VP_ATTR_EN_ALT_3__MASK 0x0000f000
+#define NVA3_3D_VP_ATTR_EN_ALT_3__SHIFT 12
+#define NVA3_3D_VP_ATTR_EN_ALT_3_X 0x00001000
+#define NVA3_3D_VP_ATTR_EN_ALT_3_Y 0x00002000
+#define NVA3_3D_VP_ATTR_EN_ALT_3_Z 0x00004000
+#define NVA3_3D_VP_ATTR_EN_ALT_3_W 0x00008000
+#define NVA3_3D_VP_ATTR_EN_ALT_2__MASK 0x00000f00
+#define NVA3_3D_VP_ATTR_EN_ALT_2__SHIFT 8
+#define NVA3_3D_VP_ATTR_EN_ALT_2_X 0x00000100
+#define NVA3_3D_VP_ATTR_EN_ALT_2_Y 0x00000200
+#define NVA3_3D_VP_ATTR_EN_ALT_2_Z 0x00000400
+#define NVA3_3D_VP_ATTR_EN_ALT_2_W 0x00000800
+#define NVA3_3D_VP_ATTR_EN_ALT_1__MASK 0x000000f0
+#define NVA3_3D_VP_ATTR_EN_ALT_1__SHIFT 4
+#define NVA3_3D_VP_ATTR_EN_ALT_1_X 0x00000010
+#define NVA3_3D_VP_ATTR_EN_ALT_1_Y 0x00000020
+#define NVA3_3D_VP_ATTR_EN_ALT_1_Z 0x00000040
+#define NVA3_3D_VP_ATTR_EN_ALT_1_W 0x00000080
+#define NVA3_3D_VP_ATTR_EN_ALT_0__MASK 0x0000000f
+#define NVA3_3D_VP_ATTR_EN_ALT_0__SHIFT 0
+#define NVA3_3D_VP_ATTR_EN_ALT_0_X 0x00000001
+#define NVA3_3D_VP_ATTR_EN_ALT_0_Y 0x00000002
+#define NVA3_3D_VP_ATTR_EN_ALT_0_Z 0x00000004
+#define NVA3_3D_VP_ATTR_EN_ALT_0_W 0x00000008
+
+#define NVA3_3D_UNK1140 0x00001140
+
+#define NVA0_3D_UNK1144 0x00001144
+
+#define NVA0_3D_VTX_ATTR_DEFINE 0x0000114c
+#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff
+#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000
+
+#define NVA0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0))
+#define NVA0_3D_VTX_ATTR_DATA__ESIZE 0x00000004
+#define NVA0_3D_VTX_ATTR_DATA__LEN 0x00000004
+
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT(i0) (0x00001160 + 0x4*(i0))
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__ESIZE 0x00000004
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__LEN 0x00000020
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__MASK 0x0000001f
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__SHIFT 0
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_CONST 0x00000040
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__MASK 0x001fff80
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__SHIFT 7
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__MASK 0x07e00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__SHIFT 21
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32_32 0x00200000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32 0x00400000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16_16 0x00600000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32 0x00800000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16 0x00a00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8_8 0x01400000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16 0x01e00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32 0x02400000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8 0x02600000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8 0x03000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16 0x03600000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8 0x03a00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_2_10_10_10 0x06000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__MASK 0x38000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__SHIFT 27
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SNORM 0x08000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UNORM 0x10000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SINT 0x18000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UINT 0x20000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_USCALED 0x28000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SSCALED 0x30000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_FLOAT 0x38000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BGRA 0x80000000
+
+#define NV50_3D_RT_CONTROL 0x0000121c
+#define NV50_3D_RT_CONTROL_COUNT__MASK 0x0000000f
+#define NV50_3D_RT_CONTROL_COUNT__SHIFT 0
+#define NV50_3D_RT_CONTROL_MAP0__MASK 0x00000070
+#define NV50_3D_RT_CONTROL_MAP0__SHIFT 4
+#define NV50_3D_RT_CONTROL_MAP1__MASK 0x00000380
+#define NV50_3D_RT_CONTROL_MAP1__SHIFT 7
+#define NV50_3D_RT_CONTROL_MAP2__MASK 0x00001c00
+#define NV50_3D_RT_CONTROL_MAP2__SHIFT 10
+#define NV50_3D_RT_CONTROL_MAP3__MASK 0x0000e000
+#define NV50_3D_RT_CONTROL_MAP3__SHIFT 13
+#define NV50_3D_RT_CONTROL_MAP4__MASK 0x00070000
+#define NV50_3D_RT_CONTROL_MAP4__SHIFT 16
+#define NV50_3D_RT_CONTROL_MAP5__MASK 0x00380000
+#define NV50_3D_RT_CONTROL_MAP5__SHIFT 19
+#define NV50_3D_RT_CONTROL_MAP6__MASK 0x01c00000
+#define NV50_3D_RT_CONTROL_MAP6__SHIFT 22
+#define NV50_3D_RT_CONTROL_MAP7__MASK 0x0e000000
+#define NV50_3D_RT_CONTROL_MAP7__SHIFT 25
+
+#define NV50_3D_UNK1220 0x00001220
+
+#define NV50_3D_RT_ARRAY_MODE 0x00001224
+#define NV50_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NV50_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
+#define NV50_3D_RT_ARRAY_MODE_MODE__MASK 0x00010000
+#define NV50_3D_RT_ARRAY_MODE_MODE__SHIFT 16
+#define NV50_3D_RT_ARRAY_MODE_MODE_2D_ARRAY 0x00000000
+#define NV50_3D_RT_ARRAY_MODE_MODE_3D 0x00010000
+
+#define NV50_3D_ZETA_HORIZ 0x00001228
+
+#define NV50_3D_ZETA_VERT 0x0000122c
+
+#define NV50_3D_ZETA_ARRAY_MODE 0x00001230
+#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0
+#define NV50_3D_ZETA_ARRAY_MODE_UNK 0x00010000
+
+#define NV50_3D_LINKED_TSC 0x00001234
+
+#define NV50_3D_UNK1238 0x00001238
+
+#define NVA0_3D_DRAW_TFB_BYTES 0x0000123c
+
+#define NV50_3D_RT_HORIZ(i0) (0x00001240 + 0x8*(i0))
+#define NV50_3D_RT_HORIZ__ESIZE 0x00000008
+#define NV50_3D_RT_HORIZ__LEN 0x00000008
+#define NV50_3D_RT_HORIZ_WIDTH__MASK 0x0fffffff
+#define NV50_3D_RT_HORIZ_WIDTH__SHIFT 0
+#define NV50_3D_RT_HORIZ_LINEAR 0x80000000
+
+#define NV50_3D_RT_VERT(i0) (0x00001244 + 0x8*(i0))
+#define NV50_3D_RT_VERT__ESIZE 0x00000008
+#define NV50_3D_RT_VERT__LEN 0x00000008
+
+#define NV50_3D_CB_DEF_ADDRESS_HIGH 0x00001280
+
+#define NV50_3D_CB_DEF_ADDRESS_LOW 0x00001284
+
+#define NV50_3D_CB_DEF_SET 0x00001288
+#define NV50_3D_CB_DEF_SET_SIZE__MASK 0x0000ffff
+#define NV50_3D_CB_DEF_SET_SIZE__SHIFT 0
+#define NV50_3D_CB_DEF_SET_BUFFER__MASK 0x007f0000
+#define NV50_3D_CB_DEF_SET_BUFFER__SHIFT 16
+
+#define NV50_3D_UNK128C 0x0000128c
+#define NV50_3D_UNK128C_0__MASK 0x00000003
+#define NV50_3D_UNK128C_0__SHIFT 0
+#define NV50_3D_UNK128C_1__MASK 0x00000030
+#define NV50_3D_UNK128C_1__SHIFT 4
+#define NV50_3D_UNK128C_2__MASK 0x00000300
+#define NV50_3D_UNK128C_2__SHIFT 8
+#define NV50_3D_UNK128C_3__MASK 0x00003000
+#define NV50_3D_UNK128C_3__SHIFT 12
+
+#define NV50_3D_CALL_LIMIT_LOG 0x00001290
+#define NV50_3D_CALL_LIMIT_LOG_VP__MASK 0x0000000f
+#define NV50_3D_CALL_LIMIT_LOG_VP__SHIFT 0
+#define NV50_3D_CALL_LIMIT_LOG_GP__MASK 0x000000f0
+#define NV50_3D_CALL_LIMIT_LOG_GP__SHIFT 4
+#define NV50_3D_CALL_LIMIT_LOG_FP__MASK 0x00000f00
+#define NV50_3D_CALL_LIMIT_LOG_FP__SHIFT 8
+
+#define NV50_3D_STRMOUT_BUFFERS_CTRL 0x00001294
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED 0x00000001
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__MASK 0x00000002
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__SHIFT 1
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_PRIMITIVES 0x00000000
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET 0x00000002
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__MASK 0x000000f0
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT 4
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MASK 0x000fff00
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT 8
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX 0x00000800
+
+#define NV50_3D_FP_RESULT_COUNT 0x00001298
+
+#define NV50_3D_VTX_UNK129C 0x0000129c
+
+#define NV50_3D_UNK12A0 0x000012a0
+
+#define NV50_3D_UNK12A8 0x000012a8
+#define NV50_3D_UNK12A8_UNK1 0x00000001
+#define NV50_3D_UNK12A8_UNK2__MASK 0x000ffff0
+#define NV50_3D_UNK12A8_UNK2__SHIFT 4
+
+#define NV50_3D_UNK12AC 0x000012ac
+
+#define NV50_3D_UNK12B0 0x000012b0
+#define NV50_3D_UNK12B0_UNK0__MASK 0x000000ff
+#define NV50_3D_UNK12B0_UNK0__SHIFT 0
+#define NV50_3D_UNK12B0_UNK1__MASK 0x0000ff00
+#define NV50_3D_UNK12B0_UNK1__SHIFT 8
+#define NV50_3D_UNK12B0_UNK2__MASK 0x00ff0000
+#define NV50_3D_UNK12B0_UNK2__SHIFT 16
+#define NV50_3D_UNK12B0_UNK3__MASK 0xff000000
+#define NV50_3D_UNK12B0_UNK3__SHIFT 24
+#define NV50_3D_UNK12B0_UNK3__MAX 0x00000080
+
+#define NV50_3D_UNK12B4 0x000012b4
+
+#define NV50_3D_UNK12B8 0x000012b8
+
+#define NV50_3D_DEPTH_TEST_ENABLE 0x000012cc
+
+#define NV50_3D_D3D_FILL_MODE 0x000012d0
+#define NV50_3D_D3D_FILL_MODE_POINT 0x00000001
+#define NV50_3D_D3D_FILL_MODE_WIREFRAME 0x00000002
+#define NV50_3D_D3D_FILL_MODE_SOLID 0x00000003
+
+#define NV50_3D_SHADE_MODEL 0x000012d4
+#define NV50_3D_SHADE_MODEL_FLAT 0x00001d00
+#define NV50_3D_SHADE_MODEL_SMOOTH 0x00001d01
+
+#define NV50_3D_LOCAL_ADDRESS_HIGH 0x000012d8
+
+#define NV50_3D_LOCAL_ADDRESS_LOW 0x000012dc
+
+#define NV50_3D_LOCAL_SIZE_LOG 0x000012e0
+
+#define NV50_3D_BLEND_INDEPENDENT 0x000012e4
+
+#define NV50_3D_DEPTH_WRITE_ENABLE 0x000012e8
+
+#define NV50_3D_ALPHA_TEST_ENABLE 0x000012ec
+
+#define NV50_3D_PM_SET(i0) (0x000012f0 + 0x4*(i0))
+#define NV50_3D_PM_SET__ESIZE 0x00000004
+#define NV50_3D_PM_SET__LEN 0x00000004
+
+#define NV50_3D_VB_ELEMENT_U8_SETUP 0x00001300
+#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000
+#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30
+#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff
+#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0
+
+#define NV50_3D_VB_ELEMENT_U8 0x00001304
+#define NV50_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff
+#define NV50_3D_VB_ELEMENT_U8_I0__SHIFT 0
+#define NV50_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00
+#define NV50_3D_VB_ELEMENT_U8_I1__SHIFT 8
+#define NV50_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000
+#define NV50_3D_VB_ELEMENT_U8_I2__SHIFT 16
+#define NV50_3D_VB_ELEMENT_U8_I3__MASK 0xff000000
+#define NV50_3D_VB_ELEMENT_U8_I3__SHIFT 24
+
+#define NV50_3D_D3D_CULL_MODE 0x00001308
+#define NV50_3D_D3D_CULL_MODE_NONE 0x00000001
+#define NV50_3D_D3D_CULL_MODE_FRONT 0x00000002
+#define NV50_3D_D3D_CULL_MODE_BACK 0x00000003
+
+#define NV50_3D_DEPTH_TEST_FUNC 0x0000130c
+#define NV50_3D_DEPTH_TEST_FUNC_NEVER 0x00000200
+#define NV50_3D_DEPTH_TEST_FUNC_LESS 0x00000201
+#define NV50_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202
+#define NV50_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203
+#define NV50_3D_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NV50_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206
+#define NV50_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207
+
+#define NV50_3D_ALPHA_TEST_REF 0x00001310
+
+#define NV50_3D_ALPHA_TEST_FUNC 0x00001314
+#define NV50_3D_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NV50_3D_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NV50_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NV50_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NV50_3D_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV50_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NV50_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVA0_3D_DRAW_TFB_STRIDE 0x00001318
+#define NVA0_3D_DRAW_TFB_STRIDE__MIN 0x00000001
+#define NVA0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff
+
+#define NV50_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0))
+#define NV50_3D_BLEND_COLOR__ESIZE 0x00000004
+#define NV50_3D_BLEND_COLOR__LEN 0x00000004
+
+#define NV50_3D_UNK132C 0x0000132c
+
+#define NV50_3D_TSC_FLUSH 0x00001330
+#define NV50_3D_TSC_FLUSH_SPECIFIC 0x00000001
+#define NV50_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_3D_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_3D_TIC_FLUSH 0x00001334
+#define NV50_3D_TIC_FLUSH_SPECIFIC 0x00000001
+#define NV50_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_3D_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_3D_TEX_CACHE_CTL 0x00001338
+#define NV50_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NV50_3D_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NV50_3D_UNK133C 0x0000133c
+
+#define NV50_3D_BLEND_EQUATION_RGB 0x00001340
+#define NV50_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NV50_3D_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NV50_3D_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NV50_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NV50_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NV50_3D_BLEND_FUNC_SRC_RGB 0x00001344
+
+#define NV50_3D_BLEND_FUNC_DST_RGB 0x00001348
+
+#define NV50_3D_BLEND_EQUATION_ALPHA 0x0000134c
+#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NV50_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NV50_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NV50_3D_BLEND_FUNC_SRC_ALPHA 0x00001350
+
+#define NV50_3D_UNK1354 0x00001354
+
+#define NV50_3D_BLEND_FUNC_DST_ALPHA 0x00001358
+
+#define NV50_3D_UNK135C 0x0000135c
+
+#define NV50_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0))
+#define NV50_3D_BLEND_ENABLE__ESIZE 0x00000004
+#define NV50_3D_BLEND_ENABLE__LEN 0x00000008
+
+#define NV50_3D_STENCIL_ENABLE 0x00001380
+
+#define NV50_3D_STENCIL_FRONT_OP_FAIL 0x00001384
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NV50_3D_STENCIL_FRONT_FUNC_REF 0x00001394
+
+#define NV50_3D_STENCIL_FRONT_MASK 0x00001398
+
+#define NV50_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c
+
+#define NV50_3D_UNK13A0 0x000013a0
+
+#define NVA0_3D_DRAW_TFB_BASE 0x000013a4
+
+#define NV50_3D_FRAG_COLOR_CLAMP_EN 0x000013a8
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000
+
+#define NV50_3D_SCREEN_Y_CONTROL 0x000013ac
+#define NV50_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001
+#define NV50_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010
+
+#define NV50_3D_LINE_WIDTH 0x000013b0
+
+#define NV50_3D_TEX_LIMITS(i0) (0x000013b4 + 0x4*(i0))
+#define NV50_3D_TEX_LIMITS__ESIZE 0x00000004
+#define NV50_3D_TEX_LIMITS__LEN 0x00000003
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007
+
+#define NV50_3D_POINT_COORD_REPLACE_MAP(i0) (0x000013c0 + 0x4*(i0))
+#define NV50_3D_POINT_COORD_REPLACE_MAP__ESIZE 0x00000004
+#define NV50_3D_POINT_COORD_REPLACE_MAP__LEN 0x00000010
+
+#define NV50_3D_UNK1400_LANES 0x00001400
+
+#define NV50_3D_UNK1404 0x00001404
+
+#define NV50_3D_UNK1408 0x00001408
+
+#define NV50_3D_VP_START_ID 0x0000140c
+
+#define NV50_3D_GP_START_ID 0x00001410
+
+#define NV50_3D_FP_START_ID 0x00001414
+
+#define NVA3_3D_UNK1418 0x00001418
+
+#define NV50_3D_UNK141C 0x0000141c
+
+#define NV50_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420
+#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001
+#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400
+
+#define NV50_3D_VERTEX_ARRAY_FLUSH 0x0000142c
+
+#define NV50_3D_UNK1430 0x00001430
+#define NV50_3D_UNK1430_UNK0 0x00000010
+#define NV50_3D_UNK1430_UNK1 0x00000100
+
+#define NV50_3D_VB_ELEMENT_BASE 0x00001434
+
+#define NV50_3D_VB_INSTANCE_BASE 0x00001438
+
+#define NV50_3D_CLEAR_FLAGS 0x0000143c
+#define NV50_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__MASK 0x00000010
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__SHIFT 4
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_SCISSOR 0x00000000
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT 0x00000010
+
+#define NV50_3D_CODE_CB_FLUSH 0x00001440
+
+#define NV50_3D_BIND_TSC(i0) (0x00001444 + 0x8*(i0))
+#define NV50_3D_BIND_TSC__ESIZE 0x00000008
+#define NV50_3D_BIND_TSC__LEN 0x00000003
+#define NV50_3D_BIND_TSC_VALID 0x00000001
+#define NV50_3D_BIND_TSC_SAMPLER__MASK 0x000000f0
+#define NV50_3D_BIND_TSC_SAMPLER__SHIFT 4
+#define NV50_3D_BIND_TSC_TSC__MASK 0x001ff000
+#define NV50_3D_BIND_TSC_TSC__SHIFT 12
+
+#define NV50_3D_BIND_TIC(i0) (0x00001448 + 0x8*(i0))
+#define NV50_3D_BIND_TIC__ESIZE 0x00000008
+#define NV50_3D_BIND_TIC__LEN 0x00000003
+#define NV50_3D_BIND_TIC_VALID 0x00000001
+#define NV50_3D_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NV50_3D_BIND_TIC_TEXTURE__SHIFT 1
+#define NV50_3D_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NV50_3D_BIND_TIC_TIC__SHIFT 9
+
+#define NV50_3D_BIND_TSC2(i0) (0x00001468 + 0x8*(i0))
+#define NV50_3D_BIND_TSC2__ESIZE 0x00000008
+#define NV50_3D_BIND_TSC2__LEN 0x00000003
+#define NV50_3D_BIND_TSC2_VALID 0x00000001
+#define NV50_3D_BIND_TSC2_SAMPLER__MASK 0x000000f0
+#define NV50_3D_BIND_TSC2_SAMPLER__SHIFT 4
+#define NV50_3D_BIND_TSC2_TSC__MASK 0x001ff000
+#define NV50_3D_BIND_TSC2_TSC__SHIFT 12
+
+#define NV50_3D_BIND_TIC2(i0) (0x0000146c + 0x8*(i0))
+#define NV50_3D_BIND_TIC2__ESIZE 0x00000008
+#define NV50_3D_BIND_TIC2__LEN 0x00000003
+#define NV50_3D_BIND_TIC2_VALID 0x00000001
+#define NV50_3D_BIND_TIC2_TEXTURE__MASK 0x000001fe
+#define NV50_3D_BIND_TIC2_TEXTURE__SHIFT 1
+#define NV50_3D_BIND_TIC2_TIC__MASK 0x7ffffe00
+#define NV50_3D_BIND_TIC2_TIC__SHIFT 9
+
+#define NV50_3D_STRMOUT_MAP(i0) (0x00001480 + 0x4*(i0))
+#define NV50_3D_STRMOUT_MAP__ESIZE 0x00000004
+#define NV50_3D_STRMOUT_MAP__LEN 0x00000020
+
+#define NV50_3D_CLIPID_HEIGHT 0x00001504
+#define NV50_3D_CLIPID_HEIGHT__MAX 0x00002000
+
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ 0x00001508
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16
+
+#define NV50_3D_CLIPID_FILL_RECT_VERT 0x0000150c
+#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff
+#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0
+#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000
+#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16
+
+#define NV50_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510
+#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001
+#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002
+#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_2 0x00000004
+#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_3 0x00000008
+#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_4 0x00000010
+#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_5 0x00000020
+#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_6 0x00000040
+#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_7 0x00000080
+
+#define NV50_3D_SAMPLECNT_ENABLE 0x00001514
+
+#define NV50_3D_POINT_SIZE 0x00001518
+
+#define NV50_3D_ZCULL_STATCTRS_ENABLE 0x0000151c
+
+#define NV50_3D_POINT_SPRITE_ENABLE 0x00001520
+
+#define NVA0_3D_UNK152C 0x0000152c
+#define NVA0_3D_UNK152C_UNK0 0x00000001
+#define NVA0_3D_UNK152C_UNK1 0x00000010
+#define NVA0_3D_UNK152C_UNK2 0x00000100
+#define NVA0_3D_UNK152C_UNK3__MASK 0x000ff000
+#define NVA0_3D_UNK152C_UNK3__SHIFT 12
+#define NVA0_3D_UNK152C_UNK3__MAX 0x00000028
+
+#define NV50_3D_COUNTER_RESET 0x00001530
+#define NV50_3D_COUNTER_RESET_SAMPLECNT 0x00000001
+#define NV50_3D_COUNTER_RESET_ZCULL_STATS 0x00000002
+#define NVA0_3D_COUNTER_RESET_STRMOUT_VERTICES 0x00000008
+#define NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK 0x00000010
+#define NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x00000011
+#define NV50_3D_COUNTER_RESET_VFETCH_VERTICES 0x00000012
+#define NV50_3D_COUNTER_RESET_VFETCH_PRIMITIVES 0x00000013
+#define NV50_3D_COUNTER_RESET_VP_LAUNCHES 0x00000015
+#define NV50_3D_COUNTER_RESET_GP_LAUNCHES 0x0000001a
+#define NV50_3D_COUNTER_RESET_GP_PRIMITIVES_OUT 0x0000001b
+#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_PRECLIP 0x0000001c
+#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_POSTCLIP 0x0000001d
+#define NV50_3D_COUNTER_RESET_FP_PIXELS 0x0000001e
+
+#define NV50_3D_MULTISAMPLE_ENABLE 0x00001534
+
+#define NV50_3D_ZETA_ENABLE 0x00001538
+
+#define NV50_3D_MULTISAMPLE_CTRL 0x0000153c
+#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001
+#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010
+
+#define NV50_3D_NOPERSPECTIVE_BITMAP(i0) (0x00001540 + 0x4*(i0))
+#define NV50_3D_NOPERSPECTIVE_BITMAP__ESIZE 0x00000004
+#define NV50_3D_NOPERSPECTIVE_BITMAP__LEN 0x00000004
+
+#define NV50_3D_COND_ADDRESS_HIGH 0x00001550
+
+#define NV50_3D_COND_ADDRESS_LOW 0x00001554
+
+#define NV50_3D_COND_MODE 0x00001558
+#define NV50_3D_COND_MODE_NEVER 0x00000000
+#define NV50_3D_COND_MODE_ALWAYS 0x00000001
+#define NV50_3D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NV50_3D_COND_MODE_EQUAL 0x00000003
+#define NV50_3D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NV50_3D_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NV50_3D_TSC_ADDRESS_LOW 0x00001560
+#define NV50_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NV50_3D_TSC_LIMIT 0x00001564
+#define NV50_3D_TSC_LIMIT__MAX 0x00001fff
+
+#define NV50_3D_UNK1568 0x00001568
+
+#define NV50_3D_POLYGON_OFFSET_FACTOR 0x0000156c
+
+#define NV50_3D_LINE_SMOOTH_ENABLE 0x00001570
+
+#define NV50_3D_TIC_ADDRESS_HIGH 0x00001574
+
+#define NV50_3D_TIC_ADDRESS_LOW 0x00001578
+
+#define NV50_3D_TIC_LIMIT 0x0000157c
+
+#define NV50_3D_PM_CONTROL(i0) (0x00001580 + 0x4*(i0))
+#define NV50_3D_PM_CONTROL__ESIZE 0x00000004
+#define NV50_3D_PM_CONTROL__LEN 0x00000004
+#define NV50_3D_PM_CONTROL_UNK0 0x00000001
+#define NV50_3D_PM_CONTROL_UNK1__MASK 0x00000070
+#define NV50_3D_PM_CONTROL_UNK1__SHIFT 4
+#define NV50_3D_PM_CONTROL_UNK2__MASK 0x00ffff00
+#define NV50_3D_PM_CONTROL_UNK2__SHIFT 8
+#define NV50_3D_PM_CONTROL_UNK3__MASK 0xff000000
+#define NV50_3D_PM_CONTROL_UNK3__SHIFT 24
+
+#define NV50_3D_ZCULL_REGION 0x00001590
+
+#define NV50_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594
+
+#define NV50_3D_STENCIL_BACK_OP_FAIL 0x00001598
+#define NV50_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_BACK_OP_ZPASS 0x000015a0
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NV50_3D_UNK15A8 0x000015a8
+#define NV50_3D_UNK15A8_UNK1__MASK 0x00000007
+#define NV50_3D_UNK15A8_UNK1__SHIFT 0
+#define NV50_3D_UNK15A8_UNK2__MASK 0x00000070
+#define NV50_3D_UNK15A8_UNK2__SHIFT 4
+
+#define NV50_3D_UNK15AC 0x000015ac
+
+#define NV50_3D_UNK15B0 0x000015b0
+#define NV50_3D_UNK15B0_0 0x00000001
+#define NV50_3D_UNK15B0_1 0x00000010
+#define NV50_3D_UNK15B0_2 0x00000100
+
+#define NV50_3D_CSAA_ENABLE 0x000015b4
+
+#define NV50_3D_FRAMEBUFFER_SRGB 0x000015b8
+
+#define NV50_3D_POLYGON_OFFSET_UNITS 0x000015bc
+
+#define NVA3_3D_UNK15C4 0x000015c4
+
+#define NVA3_3D_UNK15C8 0x000015c8
+
+#define NV50_3D_LAYER 0x000015cc
+#define NV50_3D_LAYER_IDX__MASK 0x0000ffff
+#define NV50_3D_LAYER_IDX__SHIFT 0
+#define NV50_3D_LAYER_USE_GP 0x00010000
+
+#define NV50_3D_MULTISAMPLE_MODE 0x000015d0
+#define NV50_3D_MULTISAMPLE_MODE_MS1 0x00000000
+#define NV50_3D_MULTISAMPLE_MODE_MS2 0x00000001
+#define NV50_3D_MULTISAMPLE_MODE_MS4 0x00000002
+#define NV50_3D_MULTISAMPLE_MODE_MS8 0x00000003
+#define NV50_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004
+#define NV50_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005
+#define NV50_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008
+#define NV50_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009
+#define NV50_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a
+
+#define NV50_3D_VERTEX_BEGIN_D3D 0x000015d4
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NV50_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000
+#define NV84_3D_VERTEX_BEGIN_D3D_PRIMITIVE_ID_CONT 0x20000000
+#define NVA0_3D_VERTEX_BEGIN_D3D_INSTANCE_CONT 0x40000000
+
+#define NV50_3D_VERTEX_END_D3D 0x000015d8
+#define NV50_3D_VERTEX_END_D3D_UNK0 0x00000001
+#define NVA0_3D_VERTEX_END_D3D_UNK1 0x00000002
+
+#define NV50_3D_VERTEX_BEGIN_GL 0x000015dc
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x10000000
+#define NV84_3D_VERTEX_BEGIN_GL_PRIMITIVE_ID_CONT 0x20000000
+#define NVA0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x40000000
+
+#define NV50_3D_VERTEX_END_GL 0x000015e0
+#define NV50_3D_VERTEX_END_GL_UNK0 0x00000001
+#define NVA0_3D_VERTEX_END_GL_UNK1 0x00000002
+
+#define NV50_3D_EDGEFLAG_ENABLE 0x000015e4
+
+#define NV50_3D_VB_ELEMENT_U32 0x000015e8
+
+#define NV50_3D_VB_ELEMENT_U16_SETUP 0x000015ec
+#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000
+#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30
+#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff
+#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0
+
+#define NV50_3D_VB_ELEMENT_U16 0x000015f0
+#define NV50_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff
+#define NV50_3D_VB_ELEMENT_U16_I0__SHIFT 0
+#define NV50_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000
+#define NV50_3D_VB_ELEMENT_U16_I1__SHIFT 16
+
+#define NV50_3D_VERTEX_BASE_HIGH 0x000015f4
+
+#define NV50_3D_VERTEX_BASE_LOW 0x000015f8
+
+#define NV50_3D_VERTEX_DATA 0x00001640
+
+#define NV50_3D_PRIM_RESTART_ENABLE 0x00001644
+
+#define NV50_3D_PRIM_RESTART_INDEX 0x00001648
+
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000
+
+#define NV50_3D_VP_ATTR_EN(i0) (0x00001650 + 0x4*(i0))
+#define NV50_3D_VP_ATTR_EN__ESIZE 0x00000004
+#define NV50_3D_VP_ATTR_EN__LEN 0x00000002
+#define NV50_3D_VP_ATTR_EN_7__MASK 0xf0000000
+#define NV50_3D_VP_ATTR_EN_7__SHIFT 28
+#define NV50_3D_VP_ATTR_EN_7_X 0x10000000
+#define NV50_3D_VP_ATTR_EN_7_Y 0x20000000
+#define NV50_3D_VP_ATTR_EN_7_Z 0x40000000
+#define NV50_3D_VP_ATTR_EN_7_W 0x80000000
+#define NV50_3D_VP_ATTR_EN_6__MASK 0x0f000000
+#define NV50_3D_VP_ATTR_EN_6__SHIFT 24
+#define NV50_3D_VP_ATTR_EN_6_X 0x01000000
+#define NV50_3D_VP_ATTR_EN_6_Y 0x02000000
+#define NV50_3D_VP_ATTR_EN_6_Z 0x04000000
+#define NV50_3D_VP_ATTR_EN_6_W 0x08000000
+#define NV50_3D_VP_ATTR_EN_5__MASK 0x00f00000
+#define NV50_3D_VP_ATTR_EN_5__SHIFT 20
+#define NV50_3D_VP_ATTR_EN_5_X 0x00100000
+#define NV50_3D_VP_ATTR_EN_5_Y 0x00200000
+#define NV50_3D_VP_ATTR_EN_5_Z 0x00400000
+#define NV50_3D_VP_ATTR_EN_5_W 0x00800000
+#define NV50_3D_VP_ATTR_EN_4__MASK 0x000f0000
+#define NV50_3D_VP_ATTR_EN_4__SHIFT 16
+#define NV50_3D_VP_ATTR_EN_4_X 0x00010000
+#define NV50_3D_VP_ATTR_EN_4_Y 0x00020000
+#define NV50_3D_VP_ATTR_EN_4_Z 0x00040000
+#define NV50_3D_VP_ATTR_EN_4_W 0x00080000
+#define NV50_3D_VP_ATTR_EN_3__MASK 0x0000f000
+#define NV50_3D_VP_ATTR_EN_3__SHIFT 12
+#define NV50_3D_VP_ATTR_EN_3_X 0x00001000
+#define NV50_3D_VP_ATTR_EN_3_Y 0x00002000
+#define NV50_3D_VP_ATTR_EN_3_Z 0x00004000
+#define NV50_3D_VP_ATTR_EN_3_W 0x00008000
+#define NV50_3D_VP_ATTR_EN_2__MASK 0x00000f00
+#define NV50_3D_VP_ATTR_EN_2__SHIFT 8
+#define NV50_3D_VP_ATTR_EN_2_X 0x00000100
+#define NV50_3D_VP_ATTR_EN_2_Y 0x00000200
+#define NV50_3D_VP_ATTR_EN_2_Z 0x00000400
+#define NV50_3D_VP_ATTR_EN_2_W 0x00000800
+#define NV50_3D_VP_ATTR_EN_1__MASK 0x000000f0
+#define NV50_3D_VP_ATTR_EN_1__SHIFT 4
+#define NV50_3D_VP_ATTR_EN_1_X 0x00000010
+#define NV50_3D_VP_ATTR_EN_1_Y 0x00000020
+#define NV50_3D_VP_ATTR_EN_1_Z 0x00000040
+#define NV50_3D_VP_ATTR_EN_1_W 0x00000080
+#define NV50_3D_VP_ATTR_EN_0__MASK 0x0000000f
+#define NV50_3D_VP_ATTR_EN_0__SHIFT 0
+#define NV50_3D_VP_ATTR_EN_0_X 0x00000001
+#define NV50_3D_VP_ATTR_EN_0_Y 0x00000002
+#define NV50_3D_VP_ATTR_EN_0_Z 0x00000004
+#define NV50_3D_VP_ATTR_EN_0_W 0x00000008
+
+#define NV50_3D_POINT_SMOOTH_ENABLE 0x00001658
+
+#define NV50_3D_POINT_RASTER_RULES 0x0000165c
+#define NV50_3D_POINT_RASTER_RULES_OGL 0x00000000
+#define NV50_3D_POINT_RASTER_RULES_D3D 0x00000001
+
+#define NV50_3D_POINT_SPRITE_CTRL 0x00001660
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__MASK 0x00000010
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__SHIFT 4
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_LOWER_LEFT 0x00000000
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_UPPER_LEFT 0x00000010
+
+#define NVA0_3D_TEX_MISC 0x00001664
+#define NVA0_3D_TEX_MISC_UNK1 0x00000002
+#define NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
+
+#define NV50_3D_LINE_SMOOTH_BLUR 0x00001668
+#define NV50_3D_LINE_SMOOTH_BLUR_LOW 0x00000000
+#define NV50_3D_LINE_SMOOTH_BLUR_MEDIUM 0x00000001
+#define NV50_3D_LINE_SMOOTH_BLUR_HIGH 0x00000002
+
+#define NV50_3D_LINE_STIPPLE_ENABLE 0x0000166c
+
+#define NV50_3D_COVERAGE_LUT(i0) (0x00001670 + 0x4*(i0))
+#define NV50_3D_COVERAGE_LUT__ESIZE 0x00000004
+#define NV50_3D_COVERAGE_LUT__LEN 0x00000004
+#define NV50_3D_COVERAGE_LUT_0__MASK 0x000000ff
+#define NV50_3D_COVERAGE_LUT_0__SHIFT 0
+#define NV50_3D_COVERAGE_LUT_1__MASK 0x0000ff00
+#define NV50_3D_COVERAGE_LUT_1__SHIFT 8
+#define NV50_3D_COVERAGE_LUT_2__MASK 0x00ff0000
+#define NV50_3D_COVERAGE_LUT_2__SHIFT 16
+#define NV50_3D_COVERAGE_LUT_3__MASK 0xff000000
+#define NV50_3D_COVERAGE_LUT_3__SHIFT 24
+
+#define NV50_3D_LINE_STIPPLE 0x00001680
+#define NV50_3D_LINE_STIPPLE_FACTOR_M1__MASK 0x000000ff
+#define NV50_3D_LINE_STIPPLE_FACTOR_M1__SHIFT 0
+#define NV50_3D_LINE_STIPPLE_PATTERN__MASK 0x00ffff00
+#define NV50_3D_LINE_STIPPLE_PATTERN__SHIFT 8
+
+#define NV50_3D_PROVOKING_VERTEX_LAST 0x00001684
+
+#define NV50_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688
+
+#define NV50_3D_POLYGON_STIPPLE_ENABLE 0x0000168c
+
+#define NV50_3D_UNK1690 0x00001690
+#define NV50_3D_UNK1690_ALWAYS_DERIV 0x00000001
+#define NV50_3D_UNK1690_UNK16 0x00010000
+
+#define NV50_3D_SET_PROGRAM_CB 0x00001694
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM__MASK 0x000000f0
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM__SHIFT 4
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX 0x00000000
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY 0x00000020
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT 0x00000030
+#define NV50_3D_SET_PROGRAM_CB_INDEX__MASK 0x00000f00
+#define NV50_3D_SET_PROGRAM_CB_INDEX__SHIFT 8
+#define NV50_3D_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000
+#define NV50_3D_SET_PROGRAM_CB_BUFFER__SHIFT 12
+#define NV50_3D_SET_PROGRAM_CB_VALID 0x00000001
+
+#define NV50_3D_UNK1698 0x00001698
+#define NV50_3D_UNK1698_0 0x00000001
+#define NV50_3D_UNK1698_1 0x00000010
+#define NV50_3D_UNK1698_2 0x00000100
+
+#define NVA3_3D_SAMPLE_SHADING 0x0000169c
+#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f
+#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0
+#define NVA3_3D_SAMPLE_SHADING_ENABLE 0x00000010
+
+#define NVA3_3D_UNK16A0 0x000016a0
+
+#define NV50_3D_VP_RESULT_MAP_SIZE 0x000016ac
+
+#define NV50_3D_VP_REG_ALLOC_TEMP 0x000016b0
+
+#define NVA0_3D_UNK16B4 0x000016b4
+#define NVA0_3D_UNK16B4_UNK0 0x00000001
+#define NVA3_3D_UNK16B4_UNK1 0x00000002
+
+#define NV50_3D_VP_REG_ALLOC_RESULT 0x000016b8
+
+#define NV50_3D_VP_RESULT_MAP(i0) (0x000016bc + 0x4*(i0))
+#define NV50_3D_VP_RESULT_MAP__ESIZE 0x00000004
+#define NV50_3D_VP_RESULT_MAP__LEN 0x00000011
+#define NV50_3D_VP_RESULT_MAP_0__MASK 0x000000ff
+#define NV50_3D_VP_RESULT_MAP_0__SHIFT 0
+#define NV50_3D_VP_RESULT_MAP_1__MASK 0x0000ff00
+#define NV50_3D_VP_RESULT_MAP_1__SHIFT 8
+#define NV50_3D_VP_RESULT_MAP_2__MASK 0x00ff0000
+#define NV50_3D_VP_RESULT_MAP_2__SHIFT 16
+#define NV50_3D_VP_RESULT_MAP_3__MASK 0xff000000
+#define NV50_3D_VP_RESULT_MAP_3__SHIFT 24
+
+#define NV50_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0))
+#define NV50_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004
+#define NV50_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020
+
+#define NVA0_3D_STRMOUT_OFFSET(i0) (0x00001780 + 0x4*(i0))
+#define NVA0_3D_STRMOUT_OFFSET__ESIZE 0x00000004
+#define NVA0_3D_STRMOUT_OFFSET__LEN 0x00000004
+
+#define NV50_3D_GP_ENABLE 0x00001798
+
+#define NV50_3D_GP_REG_ALLOC_TEMP 0x000017a0
+
+#define NV50_3D_GP_REG_ALLOC_RESULT 0x000017a8
+
+#define NV50_3D_GP_RESULT_MAP_SIZE 0x000017ac
+
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE 0x000017b0
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS 0x00000001
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP 0x00000002
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP 0x00000003
+
+#define NV50_3D_RASTERIZE_ENABLE 0x000017b4
+
+#define NV50_3D_STRMOUT_ENABLE 0x000017b8
+
+#define NV50_3D_GP_RESULT_MAP(i0) (0x000017fc + 0x4*(i0))
+#define NV50_3D_GP_RESULT_MAP__ESIZE 0x00000004
+#define NV50_3D_GP_RESULT_MAP__LEN 0x00000021
+#define NV50_3D_GP_RESULT_MAP_0__MASK 0x000000ff
+#define NV50_3D_GP_RESULT_MAP_0__SHIFT 0
+#define NV50_3D_GP_RESULT_MAP_1__MASK 0x0000ff00
+#define NV50_3D_GP_RESULT_MAP_1__SHIFT 8
+#define NV50_3D_GP_RESULT_MAP_2__MASK 0x00ff0000
+#define NV50_3D_GP_RESULT_MAP_2__SHIFT 16
+#define NV50_3D_GP_RESULT_MAP_3__MASK 0xff000000
+#define NV50_3D_GP_RESULT_MAP_3__SHIFT 24
+
+#define NV50_3D_UNK187C 0x0000187c
+
+#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT(i0) (0x00001880 + 0x4*(i0))
+#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__ESIZE 0x00000004
+#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__LEN 0x00000020
+
+#define NV50_3D_GP_VIEWPORT_ID_ENABLE 0x00001900
+
+#define NV50_3D_MAP_SEMANTIC_0 0x00001904
+#define NV50_3D_MAP_SEMANTIC_0_FFC0_ID__MASK 0x000000ff
+#define NV50_3D_MAP_SEMANTIC_0_FFC0_ID__SHIFT 0
+#define NV50_3D_MAP_SEMANTIC_0_BFC0_ID__MASK 0x0000ff00
+#define NV50_3D_MAP_SEMANTIC_0_BFC0_ID__SHIFT 8
+#define NV50_3D_MAP_SEMANTIC_0_COLR_NR__MASK 0x00ff0000
+#define NV50_3D_MAP_SEMANTIC_0_COLR_NR__SHIFT 16
+#define NV50_3D_MAP_SEMANTIC_0_CLMP_EN 0xff000000
+
+#define NV50_3D_MAP_SEMANTIC_1 0x00001908
+#define NV50_3D_MAP_SEMANTIC_1_CLIP_START__MASK 0x000000ff
+#define NV50_3D_MAP_SEMANTIC_1_CLIP_START__SHIFT 0
+#define NV50_3D_MAP_SEMANTIC_1_CLIP_NUM__MASK 0x00000f00
+#define NV50_3D_MAP_SEMANTIC_1_CLIP_NUM__SHIFT 8
+
+#define NV50_3D_MAP_SEMANTIC_2 0x0000190c
+#define NV50_3D_MAP_SEMANTIC_2_LAYER_ID__MASK 0x000000ff
+#define NV50_3D_MAP_SEMANTIC_2_LAYER_ID__SHIFT 0
+
+#define NV50_3D_MAP_SEMANTIC_3 0x00001910
+#define NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK 0x00000001
+#define NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__SHIFT 0
+#define NV50_3D_MAP_SEMANTIC_3_PTSZ_ID__MASK 0x00000ff0
+#define NV50_3D_MAP_SEMANTIC_3_PTSZ_ID__SHIFT 4
+
+#define NV50_3D_MAP_SEMANTIC_4 0x00001914
+#define NV50_3D_MAP_SEMANTIC_4_PRIM_ID__MASK 0x000000ff
+#define NV50_3D_MAP_SEMANTIC_4_PRIM_ID__SHIFT 0
+
+#define NV50_3D_CULL_FACE_ENABLE 0x00001918
+
+#define NV50_3D_FRONT_FACE 0x0000191c
+#define NV50_3D_FRONT_FACE_CW 0x00000900
+#define NV50_3D_FRONT_FACE_CCW 0x00000901
+
+#define NV50_3D_CULL_FACE 0x00001920
+#define NV50_3D_CULL_FACE_FRONT 0x00000404
+#define NV50_3D_CULL_FACE_BACK 0x00000405
+#define NV50_3D_CULL_FACE_FRONT_AND_BACK 0x00000408
+
+#define NV50_3D_UNK1924 0x00001924
+
+#define NVA3_3D_FP_MULTISAMPLE 0x00001928
+#define NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK 0x00000001
+#define NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE 0x00000002
+
+#define NV50_3D_VIEWPORT_TRANSFORM_EN 0x0000192c
+
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001
+#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002
+#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000
+#define NV84_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000
+
+#define NV50_3D_UNK1940 0x00001940
+#define NV50_3D_UNK1940_0 0x00000001
+#define NV50_3D_UNK1940_1 0x00000010
+#define NV50_3D_UNK1940_2 0x00000100
+#define NV50_3D_UNK1940_3 0x00001000
+#define NV50_3D_UNK1940_4 0x00010000
+#define NV50_3D_UNK1940_5 0x00100000
+#define NV50_3D_UNK1940_6 0x01000000
+#define NV50_3D_UNK1940_7 0x10000000
+
+#define NVA3_3D_UNK1944 0x00001944
+
+#define NV50_3D_CLIP_RECTS_EN 0x0000194c
+
+#define NV50_3D_CLIP_RECTS_MODE 0x00001950
+#define NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000
+#define NV50_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001
+#define NV50_3D_CLIP_RECTS_MODE_NEVER 0x00000002
+
+#define NV50_3D_ZCULL_VALIDATE 0x00001954
+#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK0 0x00000001
+#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK1 0x00000010
+
+#define NV50_3D_ZCULL_INVALIDATE 0x00001958
+
+#define NVA3_3D_UNK1960 0x00001960
+#define NVA3_3D_UNK1960_0 0x00000001
+#define NVA3_3D_UNK1960_1 0x00000010
+
+#define NV50_3D_UNK1968 0x00001968
+#define NV50_3D_UNK1968_0 0x00000001
+#define NV50_3D_UNK1968_1 0x00000010
+
+#define NV50_3D_FP_CTRL_UNK196C 0x0000196c
+#define NV50_3D_FP_CTRL_UNK196C_0 0x00000001
+#define NV50_3D_FP_CTRL_UNK196C_1 0x00000010
+
+#define NV50_3D_UNK1978 0x00001978
+
+#define NV50_3D_CLIPID_ENABLE 0x0000197c
+
+#define NV50_3D_CLIPID_WIDTH 0x00001980
+#define NV50_3D_CLIPID_WIDTH__MAX 0x00002000
+#define NV50_3D_CLIPID_WIDTH__ALIGN 0x00000040
+
+#define NV50_3D_CLIPID_ID 0x00001984
+
+#define NV50_3D_FP_INTERPOLANT_CTRL 0x00001988
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__MASK 0xff000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__SHIFT 24
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_X 0x01000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Y 0x02000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Z 0x04000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_W 0x08000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__MASK 0x00ff0000
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT 16
+#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__MASK 0x0000ff00
+#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__SHIFT 8
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__MASK 0x000000ff
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT 0
+
+#define NV50_3D_FP_REG_ALLOC_TEMP 0x0000198c
+
+#define NV50_3D_REG_MODE 0x000019a0
+#define NV50_3D_REG_MODE_PACKED 0x00000001
+#define NV50_3D_REG_MODE_STRIPED 0x00000002
+
+#define NV50_3D_FP_CONTROL 0x000019a8
+#define NV50_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001
+#define NV50_3D_FP_CONTROL_EXPORTS_Z 0x00000100
+#define NV50_3D_FP_CONTROL_USES_KIL 0x00100000
+
+#define NV50_3D_DEPTH_BOUNDS_EN 0x000019bc
+
+#define NV50_3D_UNK19C0 0x000019c0
+
+#define NV50_3D_LOGIC_OP_ENABLE 0x000019c4
+
+#define NV50_3D_LOGIC_OP 0x000019c8
+#define NV50_3D_LOGIC_OP_CLEAR 0x00001500
+#define NV50_3D_LOGIC_OP_AND 0x00001501
+#define NV50_3D_LOGIC_OP_AND_REVERSE 0x00001502
+#define NV50_3D_LOGIC_OP_COPY 0x00001503
+#define NV50_3D_LOGIC_OP_AND_INVERTED 0x00001504
+#define NV50_3D_LOGIC_OP_NOOP 0x00001505
+#define NV50_3D_LOGIC_OP_XOR 0x00001506
+#define NV50_3D_LOGIC_OP_OR 0x00001507
+#define NV50_3D_LOGIC_OP_NOR 0x00001508
+#define NV50_3D_LOGIC_OP_EQUIV 0x00001509
+#define NV50_3D_LOGIC_OP_INVERT 0x0000150a
+#define NV50_3D_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NV50_3D_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NV50_3D_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NV50_3D_LOGIC_OP_NAND 0x0000150e
+#define NV50_3D_LOGIC_OP_SET 0x0000150f
+
+#define NV50_3D_ZETA_COMP_ENABLE 0x000019cc
+
+#define NV50_3D_CLEAR_BUFFERS 0x000019d0
+#define NV50_3D_CLEAR_BUFFERS_Z 0x00000001
+#define NV50_3D_CLEAR_BUFFERS_S 0x00000002
+#define NV50_3D_CLEAR_BUFFERS_R 0x00000004
+#define NV50_3D_CLEAR_BUFFERS_G 0x00000008
+#define NV50_3D_CLEAR_BUFFERS_B 0x00000010
+#define NV50_3D_CLEAR_BUFFERS_A 0x00000020
+#define NV50_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0
+#define NV50_3D_CLEAR_BUFFERS_RT__SHIFT 6
+#define NV50_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
+#define NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
+
+#define NV50_3D_CLIPID_FILL 0x000019d4
+
+#define NV50_3D_UNK19D8(i0) (0x000019d8 + 0x4*(i0))
+#define NV50_3D_UNK19D8__ESIZE 0x00000004
+#define NV50_3D_UNK19D8__LEN 0x00000002
+
+#define NV50_3D_RT_COMP_ENABLE(i0) (0x000019e0 + 0x4*(i0))
+#define NV50_3D_RT_COMP_ENABLE__ESIZE 0x00000004
+#define NV50_3D_RT_COMP_ENABLE__LEN 0x00000008
+
+#define NV50_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
+#define NV50_3D_COLOR_MASK__ESIZE 0x00000004
+#define NV50_3D_COLOR_MASK__LEN 0x00000008
+#define NV50_3D_COLOR_MASK_R 0x0000000f
+#define NV50_3D_COLOR_MASK_G 0x000000f0
+#define NV50_3D_COLOR_MASK_B 0x00000f00
+#define NV50_3D_COLOR_MASK_A 0x0000f000
+
+#define NV50_3D_UNK1A20 0x00001a20
+
+#define NV50_3D_DELAY 0x00001a24
+
+#define NV50_3D_UNK1A28 0x00001a28
+#define NV50_3D_UNK1A28_0__MASK 0x000000ff
+#define NV50_3D_UNK1A28_0__SHIFT 0
+#define NV50_3D_UNK1A28_1 0x00000100
+
+#define NV50_3D_UNK1A2C 0x00001a2c
+
+#define NV50_3D_UNK1A30 0x00001a30
+
+#define NV50_3D_UNK1A34 0x00001a34
+
+#define NV50_3D_UNK1A38 0x00001a38
+
+#define NV50_3D_UNK1A3C 0x00001a3c
+
+#define NV50_3D_UNK1A40(i0) (0x00001a40 + 0x4*(i0))
+#define NV50_3D_UNK1A40__ESIZE 0x00000004
+#define NV50_3D_UNK1A40__LEN 0x00000010
+#define NV50_3D_UNK1A40_0__MASK 0x00000007
+#define NV50_3D_UNK1A40_0__SHIFT 0
+#define NV50_3D_UNK1A40_1__MASK 0x00000070
+#define NV50_3D_UNK1A40_1__SHIFT 4
+#define NV50_3D_UNK1A40_2__MASK 0x00000700
+#define NV50_3D_UNK1A40_2__SHIFT 8
+#define NV50_3D_UNK1A40_3__MASK 0x00007000
+#define NV50_3D_UNK1A40_3__SHIFT 12
+#define NV50_3D_UNK1A40_4__MASK 0x00070000
+#define NV50_3D_UNK1A40_4__SHIFT 16
+#define NV50_3D_UNK1A40_5__MASK 0x00700000
+#define NV50_3D_UNK1A40_5__SHIFT 20
+#define NV50_3D_UNK1A40_6__MASK 0x07000000
+#define NV50_3D_UNK1A40_6__SHIFT 24
+#define NV50_3D_UNK1A40_7__MASK 0x70000000
+#define NV50_3D_UNK1A40_7__SHIFT 28
+
+#define NV50_3D_STRMOUT_ADDRESS_HIGH(i0) (0x00001a80 + 0x10*(i0))
+#define NV50_3D_STRMOUT_ADDRESS_HIGH__ESIZE 0x00000010
+#define NV50_3D_STRMOUT_ADDRESS_HIGH__LEN 0x00000004
+
+#define NV50_3D_STRMOUT_ADDRESS_LOW(i0) (0x00001a84 + 0x10*(i0))
+#define NV50_3D_STRMOUT_ADDRESS_LOW__ESIZE 0x00000010
+#define NV50_3D_STRMOUT_ADDRESS_LOW__LEN 0x00000004
+
+#define NV50_3D_STRMOUT_NUM_ATTRIBS(i0) (0x00001a88 + 0x10*(i0))
+#define NV50_3D_STRMOUT_NUM_ATTRIBS__ESIZE 0x00000010
+#define NV50_3D_STRMOUT_NUM_ATTRIBS__LEN 0x00000004
+#define NV50_3D_STRMOUT_NUM_ATTRIBS__MAX 0x00000040
+
+#define NVA0_3D_STRMOUT_OFFSET_LIMIT(i0) (0x00001a8c + 0x10*(i0))
+#define NVA0_3D_STRMOUT_OFFSET_LIMIT__ESIZE 0x00000010
+#define NVA0_3D_STRMOUT_OFFSET_LIMIT__LEN 0x00000004
+
+#define NV50_3D_VERTEX_ARRAY_ATTRIB(i0) (0x00001ac0 + 0x4*(i0))
+#define NV50_3D_VERTEX_ARRAY_ATTRIB__ESIZE 0x00000004
+#define NV50_3D_VERTEX_ARRAY_ATTRIB__LEN 0x00000010
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__MASK 0x0000000f
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__SHIFT 0
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_CONST 0x00000010
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__MASK 0x0007ffe0
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__SHIFT 5
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__MASK 0x01f80000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__SHIFT 19
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 0x00080000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32 0x00100000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16 0x00180000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32 0x00200000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16 0x00280000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8 0x00500000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16 0x00780000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32 0x00900000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8 0x00980000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_2_10_10_10 0x01800000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__MASK 0x7e000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__SHIFT 25
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x7e000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x24000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x12000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x5a000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x6c000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x48000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x36000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_BGRA 0x80000000
+
+#define NV50_3D_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NV50_3D_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NV50_3D_QUERY_SEQUENCE 0x00001b08
+
+#define NV50_3D_QUERY_GET 0x00001b0c
+#define NV50_3D_QUERY_GET_MODE__MASK 0x00000003
+#define NV50_3D_QUERY_GET_MODE__SHIFT 0
+#define NV50_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000
+#define NV50_3D_QUERY_GET_MODE_SYNC 0x00000001
+#define NV50_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002
+#define NV50_3D_QUERY_GET_UNK4 0x00000010
+#define NVA0_3D_QUERY_GET_INDEX__MASK 0x000000e0
+#define NVA0_3D_QUERY_GET_INDEX__SHIFT 5
+#define NV50_3D_QUERY_GET_UNK8 0x00000100
+#define NV50_3D_QUERY_GET_UNIT__MASK 0x0000f000
+#define NV50_3D_QUERY_GET_UNIT__SHIFT 12
+#define NV50_3D_QUERY_GET_UNIT_UNK00 0x00000000
+#define NV50_3D_QUERY_GET_UNIT_VFETCH 0x00001000
+#define NV50_3D_QUERY_GET_UNIT_VP 0x00002000
+#define NV50_3D_QUERY_GET_UNIT_RAST 0x00004000
+#define NV50_3D_QUERY_GET_UNIT_STRMOUT 0x00005000
+#define NV50_3D_QUERY_GET_UNIT_GP 0x00006000
+#define NV50_3D_QUERY_GET_UNIT_ZCULL 0x00007000
+#define NV50_3D_QUERY_GET_UNIT_TPROP 0x0000a000
+#define NV50_3D_QUERY_GET_UNIT_UNK0C 0x0000c000
+#define NV50_3D_QUERY_GET_UNIT_CROP 0x0000f000
+#define NV50_3D_QUERY_GET_SYNC_COND__MASK 0x00010000
+#define NV50_3D_QUERY_GET_SYNC_COND__SHIFT 16
+#define NV50_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000
+#define NV50_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000
+#define NV50_3D_QUERY_GET_INTR 0x00100000
+#define NV50_3D_QUERY_GET_TYPE__MASK 0x00800000
+#define NV50_3D_QUERY_GET_TYPE__SHIFT 23
+#define NV50_3D_QUERY_GET_TYPE_QUERY 0x00000000
+#define NV50_3D_QUERY_GET_TYPE_COUNTER 0x00800000
+#define NV50_3D_QUERY_GET_QUERY_SELECT__MASK 0x0f000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT__SHIFT 24
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZERO 0x00000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_SAMPLECNT 0x01000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_STRMOUT_NO_OVERFLOW 0x02000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_DROPPED_PRIMITIVES 0x03000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_VERTICES 0x04000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK0 0x05000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK1 0x06000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK2 0x07000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK3 0x08000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_RT_UNK14 0x0c000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_OFFSET 0x0d000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT__MASK 0x0f000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT__SHIFT 24
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_VERTICES 0x00000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_PRIMITIVES 0x01000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_VP_LAUNCHES 0x02000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_LAUNCHES 0x03000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_PRIMITIVES_OUT 0x04000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_TRANSFORM_FEEDBACK 0x05000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_GENERATED_PRIMITIVES 0x06000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_PRECLIP 0x07000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_POSTCLIP 0x08000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_FP_PIXELS 0x09000000
+#define NV84_3D_QUERY_GET_COUNTER_SELECT_UNK0A 0x0a000000
+#define NVA0_3D_QUERY_GET_COUNTER_SELECT_UNK0C 0x0c000000
+#define NV50_3D_QUERY_GET_SHORT 0x10000000
+
+#define NVA3_3D_VP_RESULT_MAP_ALT(i0) (0x00001b3c + 0x4*(i0))
+#define NVA3_3D_VP_RESULT_MAP_ALT__ESIZE 0x00000004
+#define NVA3_3D_VP_RESULT_MAP_ALT__LEN 0x00000020
+#define NVA3_3D_VP_RESULT_MAP_ALT_0__MASK 0x000000ff
+#define NVA3_3D_VP_RESULT_MAP_ALT_0__SHIFT 0
+#define NVA3_3D_VP_RESULT_MAP_ALT_1__MASK 0x0000ff00
+#define NVA3_3D_VP_RESULT_MAP_ALT_1__SHIFT 8
+#define NVA3_3D_VP_RESULT_MAP_ALT_2__MASK 0x00ff0000
+#define NVA3_3D_VP_RESULT_MAP_ALT_2__SHIFT 16
+#define NVA3_3D_VP_RESULT_MAP_ALT_3__MASK 0xff000000
+#define NVA3_3D_VP_RESULT_MAP_ALT_3__SHIFT 24
+
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT(i0) (0x00001c00 + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__LEN 0x00000020
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__MASK 0x00000fff
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__SHIFT 0
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_ENABLE 0x20000000
+
+#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT(i0) (0x00001c04 + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__LEN 0x00000020
+
+#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT(i0) (0x00001c08 + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__LEN 0x00000020
+
+#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT(i0) (0x00001c0c + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__LEN 0x00000020
+
+#define NVA3_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0))
+#define NVA3_3D_IBLEND__ESIZE 0x00000020
+#define NVA3_3D_IBLEND__LEN 0x00000008
+
+#define NVA3_3D_IBLEND_UNK00(i0) (0x00001e00 + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0))
+#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVA3_3D_IBLEND_EQUATION_RGB_MIN 0x00008007
+#define NVA3_3D_IBLEND_EQUATION_RGB_MAX 0x00008008
+#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVA3_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0))
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVA3_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0))
+
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT(i0) (0x00001f00 + 0x8*(i0))
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__ESIZE 0x00000008
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__LEN 0x00000020
+
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT(i0) (0x00001f04 + 0x8*(i0))
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__ESIZE 0x00000008
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__LEN 0x00000020
+
+
+#endif /* NV50_3D_XML */
diff --git a/src/gallium/drivers/nv50/nv50_3ddefs.xml.h b/src/gallium/drivers/nv50/nv50_3ddefs.xml.h
new file mode 100644
index 0000000000..f26ac45da4
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_3ddefs.xml.h
@@ -0,0 +1,98 @@
+#ifndef NV_3DDEFS_XML
+#define NV_3DDEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38)
+- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28)
+- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000
+#define NV50_3D_BLEND_FACTOR_ONE 0x00004001
+#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308
+#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903
+
+#endif /* NV_3DDEFS_XML */
diff --git a/src/gallium/drivers/nv50/nv50_buffer.c b/src/gallium/drivers/nv50/nv50_buffer.c
deleted file mode 100644
index 45356f9f63..0000000000
--- a/src/gallium/drivers/nv50/nv50_buffer.c
+++ /dev/null
@@ -1,151 +0,0 @@
-
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-#include "util/u_math.h"
-
-#include "nouveau/nouveau_screen.h"
-#include "nouveau/nouveau_winsys.h"
-#include "nv50_resource.h"
-
-
-
-static void nv50_buffer_destroy(struct pipe_screen *pscreen,
- struct pipe_resource *presource)
-{
- struct nv50_resource *buffer = nv50_resource(presource);
-
- nouveau_screen_bo_release(pscreen, buffer->bo);
- FREE(buffer);
-}
-
-
-
-
-/* Utility functions for transfer create/destroy are hooked in and
- * just record the arguments to those functions.
- */
-static void *
-nv50_buffer_transfer_map( struct pipe_context *pipe,
- struct pipe_transfer *transfer )
-{
- struct nv50_resource *buffer = nv50_resource(transfer->resource);
- uint8_t *map;
-
- map = nouveau_screen_bo_map_range( pipe->screen,
- buffer->bo,
- transfer->box.x,
- transfer->box.width,
- nouveau_screen_transfer_flags(transfer->usage) );
- if (map == NULL)
- return NULL;
-
- return map + transfer->box.x;
-}
-
-
-
-static void nv50_buffer_transfer_flush_region( struct pipe_context *pipe,
- struct pipe_transfer *transfer,
- const struct pipe_box *box)
-{
- struct nv50_resource *buffer = nv50_resource(transfer->resource);
-
- nouveau_screen_bo_map_flush_range(pipe->screen,
- buffer->bo,
- transfer->box.x + box->x,
- box->width);
-}
-
-static void nv50_buffer_transfer_unmap( struct pipe_context *pipe,
- struct pipe_transfer *transfer )
-{
- struct nv50_resource *buffer = nv50_resource(transfer->resource);
-
- nouveau_screen_bo_unmap(pipe->screen, buffer->bo);
-}
-
-
-
-
-const struct u_resource_vtbl nv50_buffer_vtbl =
-{
- u_default_resource_get_handle, /* get_handle */
- nv50_buffer_destroy, /* resource_destroy */
- NULL, /* is_resource_referenced */
- u_default_get_transfer, /* get_transfer */
- u_default_transfer_destroy, /* transfer_destroy */
- nv50_buffer_transfer_map, /* transfer_map */
- nv50_buffer_transfer_flush_region, /* transfer_flush_region */
- nv50_buffer_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
-};
-
-
-
-
-struct pipe_resource *
-nv50_buffer_create(struct pipe_screen *pscreen,
- const struct pipe_resource *template)
-{
- struct nv50_resource *buffer;
-
- buffer = CALLOC_STRUCT(nv50_resource);
- if (!buffer)
- return NULL;
-
- buffer->base = *template;
- buffer->vtbl = &nv50_buffer_vtbl;
- pipe_reference_init(&buffer->base.reference, 1);
- buffer->base.screen = pscreen;
-
- buffer->bo = nouveau_screen_bo_new(pscreen,
- 16,
- buffer->base.usage,
- buffer->base.bind,
- buffer->base.width0);
-
- if (buffer->bo == NULL)
- goto fail;
-
- return &buffer->base;
-
-fail:
- FREE(buffer);
- return NULL;
-}
-
-
-struct pipe_resource *
-nv50_user_buffer_create(struct pipe_screen *pscreen,
- void *ptr,
- unsigned bytes,
- unsigned bind)
-{
- struct nv50_resource *buffer;
-
- buffer = CALLOC_STRUCT(nv50_resource);
- if (!buffer)
- return NULL;
-
- pipe_reference_init(&buffer->base.reference, 1);
- buffer->vtbl = &nv50_buffer_vtbl;
- buffer->base.screen = pscreen;
- buffer->base.format = PIPE_FORMAT_R8_UNORM;
- buffer->base.usage = PIPE_USAGE_IMMUTABLE;
- buffer->base.bind = bind;
- buffer->base.width0 = bytes;
- buffer->base.height0 = 1;
- buffer->base.depth0 = 1;
- buffer->base.array_size = 1;
-
- buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes);
- if (!buffer->bo)
- goto fail;
-
- return &buffer->base;
-
-fail:
- FREE(buffer);
- return NULL;
-}
-
diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c
deleted file mode 100644
index ee7cf281f4..0000000000
--- a/src/gallium/drivers/nv50/nv50_clear.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2008 Ben Skeggs
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
- * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-
-#include "nv50_context.h"
-
-void
-nv50_clear(struct pipe_context *pipe, unsigned buffers,
- const float *rgba, double depth, unsigned stencil)
-{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct pipe_framebuffer_state *fb = &nv50->framebuffer;
- unsigned mode = 0, i;
- const unsigned dirty = nv50->dirty;
-
- /* don't need NEW_BLEND, NV50TCL_COLOR_MASK doesn't affect CLEAR_BUFFERS */
- nv50->dirty &= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
- if (!nv50_state_validate(nv50, 64))
- return;
-
- if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
- BEGIN_RING(chan, tesla, NV50TCL_CLEAR_COLOR(0), 4);
- OUT_RING (chan, fui(rgba[0]));
- OUT_RING (chan, fui(rgba[1]));
- OUT_RING (chan, fui(rgba[2]));
- OUT_RING (chan, fui(rgba[3]));
- mode |= 0x3c;
- }
-
- if (buffers & PIPE_CLEAR_DEPTH) {
- BEGIN_RING(chan, tesla, NV50TCL_CLEAR_DEPTH, 1);
- OUT_RING (chan, fui(depth));
- mode |= NV50TCL_CLEAR_BUFFERS_Z;
- }
- if (buffers & PIPE_CLEAR_STENCIL) {
- BEGIN_RING(chan, tesla, NV50TCL_CLEAR_STENCIL, 1);
- OUT_RING (chan, stencil & 0xff);
- mode |= NV50TCL_CLEAR_BUFFERS_S;
- }
-
- BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1);
- OUT_RING (chan, mode);
-
- for (i = 1; i < fb->nr_cbufs; i++) {
- BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1);
- OUT_RING (chan, (i << 6) | 0x3c);
- }
- nv50->dirty = dirty;
-}
-
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 0874cb5e4e..930cee7c1e 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Ben Skeggs
+ * Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,76 +27,179 @@
#include "nv50_screen.h"
#include "nv50_resource.h"
+#include "nouveau/nouveau_reloc.h"
+
static void
-nv50_flush(struct pipe_context *pipe, unsigned flags,
- struct pipe_fence_handle **fence)
+nv50_flush(struct pipe_context *pipe,
+ struct pipe_fence_handle **fence)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_screen *screen = &nv50_context(pipe)->screen->base;
+
+ if (fence)
+ nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
- if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
- BEGIN_RING(chan, nv50->screen->tesla, 0x1338, 1);
- OUT_RING (chan, 0x20);
- }
+ /* Try to emit before firing to avoid having to flush again right after
+ * in case we have to wait on this fence.
+ */
+ nouveau_fence_emit(screen->fence.current);
- if (flags & PIPE_FLUSH_FRAME)
- FIRE_RING(chan);
+ FIRE_RING(screen->channel);
+}
+
+void
+nv50_default_flush_notify(struct nouveau_channel *chan)
+{
+ struct nv50_context *nv50 = chan->user_private;
+
+ if (!nv50)
+ return;
+
+ nouveau_fence_update(&nv50->screen->base, TRUE);
+ nouveau_fence_next(&nv50->screen->base);
}
static void
-nv50_destroy(struct pipe_context *pipe)
+nv50_context_unreference_resources(struct nv50_context *nv50)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- int i;
+ unsigned s, i;
+
+ for (i = 0; i < NV50_BUFCTX_COUNT; ++i)
+ nv50_bufctx_reset(nv50, i);
- for (i = 0; i < 64; i++) {
- if (!nv50->state.hw[i])
- continue;
- so_ref(NULL, &nv50->state.hw[i]);
- }
+ for (i = 0; i < nv50->num_vtxbufs; ++i)
+ pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL);
- draw_destroy(nv50->draw);
+ pipe_resource_reference(&nv50->idxbuf.buffer, NULL);
- if (nv50->screen->cur_ctx == nv50)
- nv50->screen->cur_ctx = NULL;
+ for (s = 0; s < 3; ++s) {
+ for (i = 0; i < nv50->num_textures[s]; ++i)
+ pipe_sampler_view_reference(&nv50->textures[s][i], NULL);
- FREE(nv50);
+ for (i = 0; i < 16; ++i)
+ pipe_resource_reference(&nv50->constbuf[s][i], NULL);
+ }
}
+static void
+nv50_destroy(struct pipe_context *pipe)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50_context_unreference_resources(nv50);
+
+ draw_destroy(nv50->draw);
+
+ if (nv50->screen->cur_ctx == nv50) {
+ nv50->screen->base.channel->user_private = NULL;
+ nv50->screen->cur_ctx = NULL;
+ }
+
+ FREE(nv50);
+}
struct pipe_context *
nv50_create(struct pipe_screen *pscreen, void *priv)
{
- struct pipe_winsys *pipe_winsys = pscreen->winsys;
- struct nv50_screen *screen = nv50_screen(pscreen);
- struct nv50_context *nv50;
+ struct pipe_winsys *pipe_winsys = pscreen->winsys;
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nv50_context *nv50;
+ struct pipe_context *pipe;
- nv50 = CALLOC_STRUCT(nv50_context);
- if (!nv50)
- return NULL;
- nv50->screen = screen;
+ nv50 = CALLOC_STRUCT(nv50_context);
+ if (!nv50)
+ return NULL;
+ pipe = &nv50->base.pipe;
- nv50->pipe.winsys = pipe_winsys;
- nv50->pipe.screen = pscreen;
- nv50->pipe.priv = priv;
+ nv50->screen = screen;
+ nv50->base.screen = &screen->base;
+ nv50->base.copy_data = nv50_m2mf_copy_linear;
+ nv50->base.push_data = nv50_sifc_linear_u8;
- nv50->pipe.destroy = nv50_destroy;
+ pipe->winsys = pipe_winsys;
+ pipe->screen = pscreen;
+ pipe->priv = priv;
- nv50->pipe.draw_vbo = nv50_draw_vbo;
- nv50->pipe.clear = nv50_clear;
+ pipe->destroy = nv50_destroy;
- nv50->pipe.flush = nv50_flush;
+ pipe->draw_vbo = nv50_draw_vbo;
+ pipe->clear = nv50_clear;
+
+ pipe->flush = nv50_flush;
+
+ if (!screen->cur_ctx)
+ screen->cur_ctx = nv50;
+ screen->base.channel->user_private = nv50;
+ screen->base.channel->flush_notify = nv50_default_flush_notify;
+
+ nv50_init_query_functions(nv50);
+ nv50_init_surface_functions(nv50);
+ nv50_init_state_functions(nv50);
+ nv50_init_resource_functions(pipe);
+
+ nv50->draw = draw_create(pipe);
+ assert(nv50->draw);
+ draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50));
+
+ return pipe;
+}
+
+struct resident {
+ struct nv04_resource *res;
+ uint32_t flags;
+};
+
+void
+nv50_bufctx_add_resident(struct nv50_context *nv50, int ctx,
+ struct nv04_resource *resource, uint32_t flags)
+{
+ struct resident rsd = { resource, flags };
+
+ if (!resource->bo)
+ return;
+
+ /* We don't need to reference the resource here, it will be referenced
+ * in the context/state, and bufctx will be reset when state changes.
+ */
+ util_dynarray_append(&nv50->residents[ctx], struct resident, rsd);
+}
+
+void
+nv50_bufctx_del_resident(struct nv50_context *nv50, int ctx,
+ struct nv04_resource *resource)
+{
+ struct resident *rsd, *top;
+ unsigned i;
+
+ for (i = 0; i < nv50->residents[ctx].size / sizeof(struct resident); ++i) {
+ rsd = util_dynarray_element(&nv50->residents[ctx], struct resident, i);
+
+ if (rsd->res == resource) {
+ top = util_dynarray_pop_ptr(&nv50->residents[ctx], struct resident);
+ if (rsd != top)
+ *rsd = *top;
+ break;
+ }
+ }
+}
+
+void
+nv50_bufctx_emit_relocs(struct nv50_context *nv50)
+{
+ struct resident *rsd;
+ struct util_dynarray *array;
+ unsigned ctx, i, n;
- screen->base.channel->user_private = nv50;
+ for (ctx = 0; ctx < NV50_BUFCTX_COUNT; ++ctx) {
+ array = &nv50->residents[ctx];
- nv50_init_surface_functions(nv50);
- nv50_init_state_functions(nv50);
- nv50_init_query_functions(nv50);
- nv50_init_resource_functions(&nv50->pipe);
+ n = array->size / sizeof(struct resident);
+ MARK_RING(nv50->screen->base.channel, n, n);
+ for (i = 0; i < n; ++i) {
+ rsd = util_dynarray_element(array, struct resident, i);
- nv50->draw = draw_create(&nv50->pipe);
- assert(nv50->draw);
- draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50));
+ nv50_resource_validate(rsd->res, rsd->flags);
+ }
+ }
- return &nv50->pipe;
+ nv50_screen_make_buffers_resident(nv50->screen);
}
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index b2b0b72fe2..46e6c2250a 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -5,265 +5,230 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
-#include "pipe/p_compiler.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_inlines.h"
+#include "util/u_dynarray.h"
#include "draw/draw_vertex.h"
-#include "nouveau/nouveau_winsys.h"
-#include "nouveau/nouveau_gldefs.h"
-#include "nouveau/nouveau_stateobj.h"
-#include "nv50_reg.h"
-
+#include "nv50_winsys.h"
+#include "nv50_stateobj.h"
#include "nv50_screen.h"
#include "nv50_program.h"
+#include "nv50_resource.h"
-#define NOUVEAU_ERR(fmt, args...) \
- fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
-#define NOUVEAU_MSG(fmt, args...) \
- fprintf(stderr, "nouveau: "fmt, ##args);
-
-#define nouveau_bo_tile_layout(nvbo) \
- ((nvbo)->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)
-
-/* Constant buffer assignment */
-#define NV50_CB_PMISC 0
-#define NV50_CB_PVP 1
-#define NV50_CB_PFP 2
-#define NV50_CB_PGP 3
-#define NV50_CB_AUX 4
-
-#define NV50_NEW_BLEND (1 << 0)
-#define NV50_NEW_ZSA (1 << 1)
-#define NV50_NEW_BLEND_COLOUR (1 << 2)
-#define NV50_NEW_STIPPLE (1 << 3)
-#define NV50_NEW_SCISSOR (1 << 4)
-#define NV50_NEW_VIEWPORT (1 << 5)
-#define NV50_NEW_RASTERIZER (1 << 6)
-#define NV50_NEW_FRAMEBUFFER (1 << 7)
-#define NV50_NEW_VERTPROG (1 << 8)
-#define NV50_NEW_VERTPROG_CB (1 << 9)
-#define NV50_NEW_FRAGPROG (1 << 10)
-#define NV50_NEW_FRAGPROG_CB (1 << 11)
-#define NV50_NEW_GEOMPROG (1 << 12)
-#define NV50_NEW_GEOMPROG_CB (1 << 13)
-#define NV50_NEW_ARRAYS (1 << 14)
-#define NV50_NEW_SAMPLER (1 << 15)
-#define NV50_NEW_TEXTURE (1 << 16)
-#define NV50_NEW_STENCIL_REF (1 << 17)
-#define NV50_NEW_CLIP (1 << 18)
-
-struct nv50_blend_stateobj {
- struct pipe_blend_state pipe;
- struct nouveau_stateobj *so;
-};
-
-struct nv50_zsa_stateobj {
- struct pipe_depth_stencil_alpha_state pipe;
- struct nouveau_stateobj *so;
-};
+#include "nouveau/nouveau_context.h"
+#include "nouveau/nv_object.xml.h"
+#include "nouveau/nv_m2mf.xml.h"
+#include "nv50_3ddefs.xml.h"
+#include "nv50_3d.xml.h"
+#include "nv50_2d.xml.h"
-struct nv50_rasterizer_stateobj {
- struct pipe_rasterizer_state pipe;
- struct nouveau_stateobj *so;
-};
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
-struct nv50_sampler_stateobj {
- boolean normalized;
- unsigned tsc[8];
-};
+#ifdef NOUVEAU_DEBUG
+# define NOUVEAU_DBG(args...) printf(args);
+#else
+# define NOUVEAU_DBG(args...)
+#endif
-struct nv50_sampler_view {
- struct pipe_sampler_view pipe;
- uint32_t tic[8];
-};
+#define NV50_NEW_BLEND (1 << 0)
+#define NV50_NEW_RASTERIZER (1 << 1)
+#define NV50_NEW_ZSA (1 << 2)
+#define NV50_NEW_VERTPROG (1 << 3)
+#define NV50_NEW_GMTYPROG (1 << 6)
+#define NV50_NEW_FRAGPROG (1 << 7)
+#define NV50_NEW_BLEND_COLOUR (1 << 8)
+#define NV50_NEW_STENCIL_REF (1 << 9)
+#define NV50_NEW_CLIP (1 << 10)
+#define NV50_NEW_SAMPLE_MASK (1 << 11)
+#define NV50_NEW_FRAMEBUFFER (1 << 12)
+#define NV50_NEW_STIPPLE (1 << 13)
+#define NV50_NEW_SCISSOR (1 << 14)
+#define NV50_NEW_VIEWPORT (1 << 15)
+#define NV50_NEW_ARRAYS (1 << 16)
+#define NV50_NEW_VERTEX (1 << 17)
+#define NV50_NEW_CONSTBUF (1 << 18)
+#define NV50_NEW_TEXTURES (1 << 19)
+#define NV50_NEW_SAMPLERS (1 << 20)
+
+#define NV50_BUFCTX_CONSTANT 0
+#define NV50_BUFCTX_FRAME 1
+#define NV50_BUFCTX_VERTEX 2
+#define NV50_BUFCTX_TEXTURES 3
+#define NV50_BUFCTX_COUNT 4
+
+/* fixed constant buffer binding points - low indices for user's constbufs */
+#define NV50_CB_PVP 124
+#define NV50_CB_PGP 126
+#define NV50_CB_PFP 125
+#define NV50_CB_AUX 127
-struct nv50_vtxelt_stateobj {
- struct pipe_vertex_element pipe[16];
- unsigned num_elements;
- uint32_t hw[16];
+struct nv50_context {
+ struct nouveau_context base;
+
+ struct nv50_screen *screen;
+
+ struct util_dynarray residents[NV50_BUFCTX_COUNT];
+
+ uint32_t dirty;
+
+ struct {
+ uint32_t instance_elts; /* bitmask of per-instance elements */
+ uint32_t instance_base;
+ uint32_t interpolant_ctrl;
+ int32_t index_bias;
+ boolean prim_restart;
+ boolean point_sprite;
+ uint8_t num_vtxbufs;
+ uint8_t num_vtxelts;
+ uint8_t num_textures[3];
+ uint8_t num_samplers[3];
+ uint16_t scissor;
+ } state;
+
+ struct nv50_blend_stateobj *blend;
+ struct nv50_rasterizer_stateobj *rast;
+ struct nv50_zsa_stateobj *zsa;
+ struct nv50_vertex_stateobj *vertex;
+
+ struct nv50_program *vertprog;
+ struct nv50_program *gmtyprog;
+ struct nv50_program *fragprog;
+
+ struct pipe_resource *constbuf[3][16];
+ uint16_t constbuf_dirty[3];
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned num_vtxbufs;
+ struct pipe_index_buffer idxbuf;
+ uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
+ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
+ unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */
+ unsigned vbo_max_index;
+
+ struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS];
+ unsigned num_textures[3];
+ struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
+ unsigned num_samplers[3];
+
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_blend_color blend_colour;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_poly_stipple stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_clip_state clip;
+
+ unsigned sample_mask;
+
+ boolean vbo_push_hint;
+
+ struct draw_context *draw;
};
-static INLINE struct nv50_sampler_view *
-nv50_sampler_view(struct pipe_sampler_view *view)
-{
- return (struct nv50_sampler_view *)view;
-}
-
-static INLINE unsigned
-get_tile_height(uint32_t tile_mode)
-{
- return 1 << ((tile_mode & 0xf) + 2);
-}
-
-static INLINE unsigned
-get_tile_depth(uint32_t tile_mode)
+static INLINE struct nv50_context *
+nv50_context(struct pipe_context *pipe)
{
- return 1 << (tile_mode >> 4);
+ return (struct nv50_context *)pipe;
}
-
struct nv50_surface {
- struct pipe_surface base;
- unsigned offset;
+ struct pipe_surface base;
+ uint32_t offset;
+ uint32_t width;
+ uint16_t height;
+ uint16_t depth;
};
static INLINE struct nv50_surface *
-nv50_surface(struct pipe_surface *pt)
+nv50_surface(struct pipe_surface *ps)
{
- return (struct nv50_surface *)pt;
+ return (struct nv50_surface *)ps;
}
-struct nv50_state {
- struct nouveau_stateobj *hw[64];
- uint64_t hw_dirty;
-
- unsigned sampler_view_nr[3];
- struct nouveau_stateobj *vtxbuf;
- struct nouveau_stateobj *vtxattr;
- unsigned vtxelt_nr;
-};
+/* nv50_context.c */
+struct pipe_context *nv50_create(struct pipe_screen *, void *);
-struct nv50_context {
- struct pipe_context pipe;
-
- struct nv50_screen *screen;
-
- struct draw_context *draw;
-
- struct nv50_state state;
-
- unsigned dirty;
- struct nv50_blend_stateobj *blend;
- struct nv50_zsa_stateobj *zsa;
- struct nv50_rasterizer_stateobj *rasterizer;
- struct pipe_blend_color blend_colour;
- struct pipe_stencil_ref stencil_ref;
- struct pipe_poly_stipple stipple;
- struct pipe_scissor_state scissor;
- struct pipe_viewport_state viewport;
- struct pipe_framebuffer_state framebuffer;
- struct pipe_clip_state clip;
- struct nv50_program *vertprog;
- struct nv50_program *fragprog;
- struct nv50_program *geomprog;
- struct pipe_resource *constbuf[PIPE_SHADER_TYPES];
- struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
- unsigned vtxbuf_nr;
- struct pipe_index_buffer idxbuf;
- struct nv50_vtxelt_stateobj *vtxelt;
- struct nv50_sampler_stateobj *sampler[3][PIPE_MAX_SAMPLERS];
- unsigned sampler_nr[3];
- struct pipe_sampler_view *sampler_views[3][PIPE_MAX_SAMPLERS];
- unsigned sampler_view_nr[3];
-
- unsigned vbo_fifo;
- unsigned req_lmem;
-};
+void nv50_default_flush_notify(struct nouveau_channel *);
-static INLINE struct nv50_context *
-nv50_context(struct pipe_context *pipe)
+void nv50_bufctx_emit_relocs(struct nv50_context *);
+void nv50_bufctx_add_resident(struct nv50_context *, int ctx,
+ struct nv04_resource *, uint32_t flags);
+void nv50_bufctx_del_resident(struct nv50_context *, int ctx,
+ struct nv04_resource *);
+static INLINE void
+nv50_bufctx_reset(struct nv50_context *nv50, int ctx)
{
- return (struct nv50_context *)pipe;
+ util_dynarray_resize(&nv50->residents[ctx], 0);
}
-extern void nv50_init_surface_functions(struct nv50_context *nv50);
-extern void nv50_init_state_functions(struct nv50_context *nv50);
-extern void nv50_init_query_functions(struct nv50_context *nv50);
-extern void nv50_init_transfer_functions(struct nv50_context *nv50);
-
-extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen);
-
-extern int
-nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst,
- int dx, int dy, struct pipe_surface *src, int sx, int sy,
- int w, int h);
-
/* nv50_draw.c */
-extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50);
+extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
-/* nv50_vbo.c */
-extern void nv50_draw_vbo(struct pipe_context *pipe,
- const struct pipe_draw_info *info);
-extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso);
-extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50);
+/* nv50_program.c */
+boolean nv50_program_translate(struct nv50_program *);
+void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
-/* nv50_push.c */
-extern void
-nv50_push_elements_instanced(struct pipe_context *, struct pipe_resource *,
- unsigned idxsize, int idxbias,
- unsigned mode, unsigned start,
- unsigned count, unsigned i_start,
- unsigned i_count);
+/* nv50_query.c */
+void nv50_init_query_functions(struct nv50_context *);
-/* nv50_clear.c */
-extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
- const float *rgba, double depth, unsigned stencil);
+/* nv50_shader_state.c */
+void nv50_vertprog_validate(struct nv50_context *);
+void nv50_gmtyprog_validate(struct nv50_context *);
+void nv50_fragprog_validate(struct nv50_context *);
+void nv50_fp_linkage_validate(struct nv50_context *);
+void nv50_gp_linkage_validate(struct nv50_context *);
+void nv50_constbufs_validate(struct nv50_context *);
+void nv50_sprite_coords_validate(struct nv50_context *);
-/* nv50_program.c */
-extern struct nouveau_stateobj *
-nv50_vertprog_validate(struct nv50_context *nv50);
-extern struct nouveau_stateobj *
-nv50_fragprog_validate(struct nv50_context *nv50);
-extern struct nouveau_stateobj *
-nv50_geomprog_validate(struct nv50_context *nv50);
-extern struct nouveau_stateobj *
-nv50_fp_linkage_validate(struct nv50_context *nv50);
-extern struct nouveau_stateobj *
-nv50_gp_linkage_validate(struct nv50_context *nv50);
-extern void nv50_program_destroy(struct nv50_context *nv50,
- struct nv50_program *p);
+/* nv50_state.c */
+extern void nv50_init_state_functions(struct nv50_context *);
/* nv50_state_validate.c */
-extern boolean nv50_state_validate(struct nv50_context *nv50, unsigned dwords);
+extern boolean nv50_state_validate(struct nv50_context *);
-extern void nv50_so_init_sifc(struct nv50_context *nv50,
- struct nouveau_stateobj *so,
- struct nouveau_bo *bo, unsigned reloc,
- unsigned offset, unsigned size);
+/* nv50_surface.c */
+extern void nv50_clear(struct pipe_context *, unsigned buffers,
+ const float *rgba, double depth, unsigned stencil);
+extern void nv50_init_surface_functions(struct nv50_context *);
/* nv50_tex.c */
-extern boolean nv50_tex_construct(struct nv50_sampler_view *view);
-extern void nv50_tex_relocs(struct nv50_context *);
-extern struct nouveau_stateobj *nv50_tex_validate(struct nv50_context *);
+void nv50_validate_textures(struct nv50_context *);
+void nv50_validate_samplers(struct nv50_context *);
+
+struct pipe_sampler_view *
+nv50_create_sampler_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *);
+
+/* nv50_transfer.c */
+void
+nv50_sifc_linear_u8(struct nouveau_context *pipe,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, void *data);
+void
+nv50_m2mf_copy_linear(struct nouveau_context *pipe,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size);
+
+/* nv50_vbo.c */
+void nv50_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
+void *
+nv50_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements);
+void
+nv50_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
-/* nv50_context.c */
-struct pipe_context *
-nv50_create(struct pipe_screen *pscreen, void *priv);
+void nv50_vertex_arrays_validate(struct nv50_context *nv50);
-static INLINE unsigned
-nv50_prim(unsigned mode)
-{
- switch (mode) {
- case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
- case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
- case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
- case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
- case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
- case PIPE_PRIM_TRIANGLE_STRIP:
- return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
- case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
- case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
- case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
- case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
- case PIPE_PRIM_LINES_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
- default:
- break;
- }
-
- NOUVEAU_ERR("invalid primitive type %d\n", mode);
- return NV50TCL_VERTEX_BEGIN_POINTS;
-}
+/* nv50_push.c */
+void nv50_push_vbo(struct nv50_context *, const struct pipe_draw_info *);
#endif
diff --git a/src/gallium/drivers/nv50/nv50_defs.xml.h b/src/gallium/drivers/nv50/nv50_defs.xml.h
new file mode 100644
index 0000000000..1bf2f802b5
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_defs.xml.h
@@ -0,0 +1,142 @@
+#ifndef NV50_DEFS_XML
+#define NV50_DEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT 0x000000c0
+#define NV50_SURFACE_FORMAT_R32G32B32A32_SINT 0x000000c1
+#define NV50_SURFACE_FORMAT_R32G32B32A32_UINT 0x000000c2
+#define NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT 0x000000c3
+#define NV50_SURFACE_FORMAT_R16G16B16A16_UNORM 0x000000c6
+#define NV50_SURFACE_FORMAT_R16G16B16A16_SNORM 0x000000c7
+#define NV50_SURFACE_FORMAT_R16G16B16A16_SINT 0x000000c8
+#define NV50_SURFACE_FORMAT_R16G16B16A16_UINT 0x000000c9
+#define NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT 0x000000ca
+#define NV50_SURFACE_FORMAT_R32G32_FLOAT 0x000000cb
+#define NV50_SURFACE_FORMAT_R32G32_SINT 0x000000cc
+#define NV50_SURFACE_FORMAT_R32G32_UINT 0x000000cd
+#define NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT 0x000000ce
+#define NV50_SURFACE_FORMAT_A8R8G8B8_UNORM 0x000000cf
+#define NV50_SURFACE_FORMAT_A8R8G8B8_SRGB 0x000000d0
+#define NV50_SURFACE_FORMAT_A2B10G10R10_UNORM 0x000000d1
+#define NV50_SURFACE_FORMAT_A2B10G10R10_UINT 0x000000d2
+#define NV50_SURFACE_FORMAT_A8B8G8R8_UNORM 0x000000d5
+#define NV50_SURFACE_FORMAT_A8B8G8R8_SRGB 0x000000d6
+#define NV50_SURFACE_FORMAT_A8B8G8R8_SNORM 0x000000d7
+#define NV50_SURFACE_FORMAT_A8B8G8R8_SINT 0x000000d8
+#define NV50_SURFACE_FORMAT_A8B8G8R8_UINT 0x000000d9
+#define NV50_SURFACE_FORMAT_R16G16_UNORM 0x000000da
+#define NV50_SURFACE_FORMAT_R16G16_SNORM 0x000000db
+#define NV50_SURFACE_FORMAT_R16G16_SINT 0x000000dc
+#define NV50_SURFACE_FORMAT_R16G16_UINT 0x000000dd
+#define NV50_SURFACE_FORMAT_R16G16_FLOAT 0x000000de
+#define NV50_SURFACE_FORMAT_A2R10G10B10_UNORM 0x000000df
+#define NV50_SURFACE_FORMAT_B10G11R11_FLOAT 0x000000e0
+#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5
+#define NV50_SURFACE_FORMAT_X8R8G8B8_UNORM 0x000000e6
+#define NV50_SURFACE_FORMAT_X8R8G8B8_SRGB 0x000000e7
+#define NV50_SURFACE_FORMAT_R5G6B5_UNORM 0x000000e8
+#define NV50_SURFACE_FORMAT_A1R5G5B5_UNORM 0x000000e9
+#define NV50_SURFACE_FORMAT_R8G8_UNORM 0x000000ea
+#define NV50_SURFACE_FORMAT_R8G8_SNORM 0x000000eb
+#define NV50_SURFACE_FORMAT_R8G8_SINT 0x000000ec
+#define NV50_SURFACE_FORMAT_R8G8_UINT 0x000000ed
+#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee
+#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef
+#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0
+#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1
+#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2
+#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3
+#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4
+#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5
+#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6
+#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7
+#define NV50_SURFACE_FORMAT_X1R5G5B5_UNORM 0x000000f8
+#define NV50_SURFACE_FORMAT_X8B8G8R8_UNORM 0x000000f9
+#define NV50_SURFACE_FORMAT_X8B8G8R8_SRGB 0x000000fa
+#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a
+#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013
+#define NV50_ZETA_FORMAT_Z24S8_UNORM 0x00000014
+#define NV50_ZETA_FORMAT_X8Z24_UNORM 0x00000015
+#define NV50_ZETA_FORMAT_S8Z24_UNORM 0x00000016
+#define NV50_ZETA_FORMAT_UNK18 0x00000018
+#define NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019
+#define NV50_ZETA_FORMAT_UNK1D 0x0000001d
+#define NV50_ZETA_FORMAT_UNK1E 0x0000001e
+#define NV50_ZETA_FORMAT_UNK1F 0x0000001f
+#define NV50_QUERY__SIZE 0x00000010
+#define NV50_QUERY_COUNTER 0x00000000
+
+#define NV50_QUERY_RES 0x00000004
+
+#define NV50_QUERY_TIME 0x00000008
+
+
+#endif /* NV50_DEFS_XML */
diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c
index 2f6f607261..1d8598829c 100644
--- a/src/gallium/drivers/nv50/nv50_draw.c
+++ b/src/gallium/drivers/nv50/nv50_draw.c
@@ -25,32 +25,32 @@
#include "nv50_context.h"
struct nv50_render_stage {
- struct draw_stage stage;
- struct nv50_context *nv50;
+ struct draw_stage stage;
+ struct nv50_context *nv50;
};
static INLINE struct nv50_render_stage *
nv50_render_stage(struct draw_stage *stage)
{
- return (struct nv50_render_stage *)stage;
+ return (struct nv50_render_stage *)stage;
}
static void
nv50_render_point(struct draw_stage *stage, struct prim_header *prim)
{
- NOUVEAU_ERR("\n");
+ NOUVEAU_ERR("\n");
}
static void
nv50_render_line(struct draw_stage *stage, struct prim_header *prim)
{
- NOUVEAU_ERR("\n");
+ NOUVEAU_ERR("\n");
}
static void
nv50_render_tri(struct draw_stage *stage, struct prim_header *prim)
{
- NOUVEAU_ERR("\n");
+ NOUVEAU_ERR("\n");
}
static void
@@ -61,29 +61,28 @@ nv50_render_flush(struct draw_stage *stage, unsigned flags)
static void
nv50_render_reset_stipple_counter(struct draw_stage *stage)
{
- NOUVEAU_ERR("\n");
+ NOUVEAU_ERR("\n");
}
static void
nv50_render_destroy(struct draw_stage *stage)
{
- FREE(stage);
+ FREE(stage);
}
struct draw_stage *
nv50_draw_render_stage(struct nv50_context *nv50)
{
- struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage);
+ struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage);
- rs->nv50 = nv50;
- rs->stage.draw = nv50->draw;
- rs->stage.destroy = nv50_render_destroy;
- rs->stage.point = nv50_render_point;
- rs->stage.line = nv50_render_line;
- rs->stage.tri = nv50_render_tri;
- rs->stage.flush = nv50_render_flush;
- rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter;
+ rs->nv50 = nv50;
+ rs->stage.draw = nv50->draw;
+ rs->stage.destroy = nv50_render_destroy;
+ rs->stage.point = nv50_render_point;
+ rs->stage.line = nv50_render_line;
+ rs->stage.tri = nv50_render_tri;
+ rs->stage.flush = nv50_render_flush;
+ rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter;
- return &rs->stage;
+ return &rs->stage;
}
-
diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c
index 4282809454..7946117cf3 100644
--- a/src/gallium/drivers/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nv50/nv50_formats.c
@@ -21,26 +21,34 @@
*/
#include "nv50_screen.h"
-#include "nv50_texture.h"
-#include "nv50_reg.h"
+#include "nv50_texture.xml.h"
+#include "nv50_defs.xml.h"
+#include "nv50_3d.xml.h"
#include "pipe/p_defines.h"
-#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
- NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
- NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
- NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
- NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
- NV50TIC_0_0_FMT_##sz, \
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_##sz | \
- NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 | \
- (NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 << 3) | (r << 31)
-
-#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
- NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
- NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
- NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
- NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
- NV50TIC_0_0_FMT_##sz, 0
+#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
+ (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \
+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
+ (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \
+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
+ (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \
+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
+ (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \
+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
+ NV50_TIC_0_FMT_##sz, \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_##sz | \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_##t0 | (r << 31)
+
+#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
+ (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \
+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
+ (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \
+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
+ (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \
+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
+ (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \
+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
+ NV50_TIC_0_FMT_##sz, 0
#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER
#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW
@@ -49,98 +57,96 @@
#define SCANOUT PIPE_BIND_SCANOUT
/* for vertex buffers: */
-#define NV50TIC_0_0_FMT_8_8_8 NV50TIC_0_0_FMT_8_8_8_8
-#define NV50TIC_0_0_FMT_16_16_16 NV50TIC_0_0_FMT_16_16_16_16
-#define NV50TIC_0_0_FMT_32_32_32 NV50TIC_0_0_FMT_32_32_32_32
-
-/* NOTE: using NV50_2D_DST_FORMAT for substitute formats used with 2D engine */
+#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8
+#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16
+#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32
const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
{
/* COMMON FORMATS */
- [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50TCL_RT_FORMAT_A8R8G8B8_UNORM,
+ [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM,
A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
- [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50TCL_RT_FORMAT_X8R8G8B8_UNORM,
- A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM,
+ A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
- [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50TCL_RT_FORMAT_A8R8G8B8_SRGB,
+ [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB,
A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50TCL_RT_FORMAT_X8R8G8B8_SRGB,
- A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB,
+ A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_B5G6R5_UNORM] = { NV50TCL_RT_FORMAT_R5G6B5_UNORM,
- B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
+ [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM,
+ B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
- [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50TCL_RT_FORMAT_A1R5G5B5_UNORM,
+ [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM,
B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
- [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_2D_DST_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_B4G4R4A4_UNORM] = { 0,
B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
SAMPLER_VIEW },
- [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50TCL_RT_FORMAT_A2B10G10R10_UNORM,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
- [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50TCL_RT_FORMAT_A2R10G10B10_UNORM,
+ [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM,
A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1),
SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
/* DEPTH/STENCIL FORMATS */
- [PIPE_FORMAT_Z16_UNORM] = { NV50TCL_ZETA_FORMAT_Z16_UNORM,
- B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_DEPTH, 0),
+ [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM,
+ B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z16, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
- [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM,
- B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
+ [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM,
+ B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, S8Z24, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
- [PIPE_FORMAT_Z24X8_UNORM] = { NV50TCL_ZETA_FORMAT_X8Z24_UNORM,
- B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
+ [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM,
+ B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, X8Z24, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
- [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM,
- B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0),
+ [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_Z24S8_UNORM,
+ B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, Z24S8, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
- [PIPE_FORMAT_Z32_FLOAT] = { NV50TCL_ZETA_FORMAT_Z32_FLOAT,
- B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_DEPTH, 0),
+ [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT,
+ B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, Z32, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
[PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = {
- NV50TCL_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
- B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0),
+ NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
+ B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, X24S8Z32, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
/* LUMINANCE, ALPHA, INTENSITY */
- [PIPE_FORMAT_L8_UNORM] = { NV50_2D_DST_FORMAT_R8_UNORM,
- A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
- SAMPLER_VIEW },
+ [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_L8_SRGB] = { NV50_2D_DST_FORMAT_R8_UNORM,
- A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
- SAMPLER_VIEW },
+ [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_I8_UNORM] = { NV50_2D_DST_FORMAT_R8_UNORM,
+ [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
- SAMPLER_VIEW },
+ SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_A8_UNORM] = { NV50TCL_RT_FORMAT_A8_UNORM,
+ [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM,
A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_L8A8_UNORM] = { NV50_2D_DST_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_L8A8_UNORM] = { 0,
A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
SAMPLER_VIEW },
@@ -151,7 +157,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
/* DXT, RGTC */
[PIPE_FORMAT_DXT1_RGB] = { 0,
- B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_DXT1_RGBA] = { 0,
@@ -167,65 +173,65 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
SAMPLER_VIEW },
[PIPE_FORMAT_RGTC1_UNORM] = { 0,
- B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
+ B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_RGTC1_SNORM] = { 0,
- B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
+ B_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_RGTC2_UNORM] = { 0,
- B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
+ B_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_RGTC2_SNORM] = { 0,
- B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
+ B_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
SAMPLER_VIEW },
/* FLOAT 16 */
- [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16A16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT,
A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16X16_FLOAT,
- A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
+ [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT,
+ A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R16G16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16_FLOAT,
- A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
+ [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT,
+ A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R16_FLOAT] = { NV50TCL_RT_FORMAT_R16_FLOAT,
- A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
+ [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
/* FLOAT 32 */
- [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT,
A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32X32_FLOAT,
- A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
+ [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT,
+ A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R32G32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32_FLOAT,
- A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
+ [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT,
+ A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R32_FLOAT] = { NV50TCL_RT_FORMAT_R32_FLOAT,
- A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
+ [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
/* ODD FORMATS */
- [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50TCL_RT_FORMAT_B10G11R11_FLOAT,
- B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
+ [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT,
+ B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0,
- B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 5_9_9_9, 0),
+ B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0),
SAMPLER_VIEW },
/* SNORM 32 */
@@ -235,15 +241,15 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32B32_SNORM] = { 0,
- A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
+ A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32_SNORM] = { 0,
- A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
+ A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32_SNORM] = { 0,
- A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0),
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* UNORM 32 */
@@ -253,202 +259,202 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32B32_UNORM] = { 0,
- A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
+ A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32_UNORM] = { 0,
- A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
+ A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32_UNORM] = { 0,
- A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0),
+ A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* SNORM 16 */
- [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM,
A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16G16B16_SNORM] = { 0,
- A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
+ A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
- [PIPE_FORMAT_R16G16_SNORM] = { NV50TCL_RT_FORMAT_R16G16_SNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM,
A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R16_SNORM] = { NV50TCL_RT_FORMAT_R16_SNORM,
- A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0),
+ [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
/* UNORM 16 */
- [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_UNORM,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16G16B16_UNORM] = { 0,
- A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
+ A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
- [PIPE_FORMAT_R16G16_UNORM] = { NV50TCL_RT_FORMAT_R16G16_UNORM,
+ [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R16_UNORM] = { NV50TCL_RT_FORMAT_R16_UNORM,
- A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0),
+ [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
/* SNORM 8 */
- [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_SNORM,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM,
A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R8G8B8_SNORM] = { 0,
- A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
+ A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
- [PIPE_FORMAT_R8G8_SNORM] = { NV50TCL_RT_FORMAT_R8G8_SNORM,
- A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
+ [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM,
+ A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R8_SNORM] = { NV50TCL_RT_FORMAT_R8_SNORM,
- A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0),
+ [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
/* UNORM 8 */
- [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50TCL_RT_FORMAT_A8B8G8R8_SRGB,
+ [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R8G8B8_UNORM] = { NV50TCL_RT_FORMAT_X8B8G8R8_UNORM,
- A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+ [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM,
+ A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R8G8B8_SRGB] = { NV50TCL_RT_FORMAT_X8B8G8R8_SRGB,
- A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+ [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB,
+ A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R8G8_UNORM] = { NV50TCL_RT_FORMAT_R8G8_UNORM,
- A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+ [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM,
+ A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_R8_UNORM] = { NV50TCL_RT_FORMAT_R8_UNORM,
- A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
- /* SSCALED 32 */
+ /* SSCALED 32 (not integer, data is converted to float !) */
[PIPE_FORMAT_R32G32B32A32_SSCALED] = { 0,
A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ VERTEX_BUFFER },
[PIPE_FORMAT_R32G32B32_SSCALED] = { 0,
- A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R32G32_SSCALED] = { 0,
- A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R32_SSCALED] = { 0,
- A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
+ VERTEX_BUFFER },
/* USCALED 32 */
[PIPE_FORMAT_R32G32B32A32_USCALED] = { 0,
A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ VERTEX_BUFFER },
[PIPE_FORMAT_R32G32B32_USCALED] = { 0,
- A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R32G32_USCALED] = { 0,
- A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R32_USCALED] = { 0,
- A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32, 0),
+ VERTEX_BUFFER },
/* SSCALED 16 */
[PIPE_FORMAT_R16G16B16A16_SSCALED] = { 0,
A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ VERTEX_BUFFER },
[PIPE_FORMAT_R16G16B16_SSCALED] = { 0,
- A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R16G16_SSCALED] = { 0,
- A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R16_SSCALED] = { 0,
- A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
+ VERTEX_BUFFER },
/* USCALED 16 */
[PIPE_FORMAT_R16G16B16A16_USCALED] = { 0,
A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ VERTEX_BUFFER },
[PIPE_FORMAT_R16G16B16_USCALED] = { 0,
- A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R16G16_USCALED] = { 0,
- A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R16_USCALED] = { 0,
- A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16, 0),
+ VERTEX_BUFFER },
/* SSCALED 8 */
[PIPE_FORMAT_R8G8B8A8_SSCALED] = { 0,
A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ VERTEX_BUFFER },
[PIPE_FORMAT_R8G8B8_SSCALED] = { 0,
- A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R8G8_SSCALED] = { 0,
- A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R8_SSCALED] = { 0,
- A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
+ VERTEX_BUFFER },
/* USCALED 8 */
[PIPE_FORMAT_R8G8B8A8_USCALED] = { 0,
A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ VERTEX_BUFFER },
[PIPE_FORMAT_R8G8B8_USCALED] = { 0,
- A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R8G8_USCALED] = { 0,
- A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
+ VERTEX_BUFFER },
[PIPE_FORMAT_R8_USCALED] = { 0,
- A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0),
- VERTEX_BUFFER | SAMPLER_VIEW },
+ A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8, 0),
+ VERTEX_BUFFER },
};
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 309b6503ca..9eeca05ada 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -29,300 +29,284 @@
#include "nv50_resource.h"
#include "nv50_transfer.h"
-/* The restrictions in tile mode selection probably aren't necessary. */
static INLINE uint32_t
-get_tile_mode(unsigned ny, unsigned d)
+get_tile_dims(unsigned nx, unsigned ny, unsigned nz)
{
- uint32_t tile_mode = 0x00;
-
- if (ny > 32) tile_mode = 0x04; /* height 64 tiles */
- else
- if (ny > 16) tile_mode = 0x03; /* height 32 tiles */
- else
- if (ny > 8) tile_mode = 0x02; /* height 16 tiles */
- else
- if (ny > 4) tile_mode = 0x01; /* height 8 tiles */
-
- if (d == 1)
- return tile_mode;
- else
- if (tile_mode > 0x02)
- tile_mode = 0x02;
-
- if (d > 16 && tile_mode < 0x02)
- return tile_mode | 0x50; /* depth 32 tiles */
- if (d > 8) return tile_mode | 0x40; /* depth 16 tiles */
- if (d > 4) return tile_mode | 0x30; /* depth 8 tiles */
- if (d > 2) return tile_mode | 0x20; /* depth 4 tiles */
-
- return tile_mode | 0x10;
+ uint32_t tile_mode = 0x00;
+
+ if (ny > 32) tile_mode = 0x04; /* height 128 tiles */
+ else
+ if (ny > 16) tile_mode = 0x03; /* height 64 tiles */
+ else
+ if (ny > 8) tile_mode = 0x02; /* height 32 tiles */
+ else
+ if (ny > 4) tile_mode = 0x01; /* height 16 tiles */
+
+ if (nz == 1)
+ return tile_mode;
+ else
+ if (tile_mode > 0x02)
+ tile_mode = 0x02;
+
+ if (nz > 16 && tile_mode < 0x02)
+ return tile_mode | 0x50; /* depth 32 tiles */
+ if (nz > 8) return tile_mode | 0x40; /* depth 16 tiles */
+ if (nz > 4) return tile_mode | 0x30; /* depth 8 tiles */
+ if (nz > 2) return tile_mode | 0x20; /* depth 4 tiles */
+
+ return tile_mode | 0x10;
}
static INLINE unsigned
-get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned nb_h)
+calc_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh)
{
- unsigned tile_h = get_tile_height(tile_mode);
- unsigned tile_d = get_tile_depth(tile_mode);
+ unsigned tile_h = NV50_TILE_HEIGHT(tile_mode);
+ unsigned tile_d_shift = NV50_TILE_DIM_SHIFT(tile_mode, 1);
+ unsigned tile_d = 1 << tile_d_shift;
- /* pitch_2d == to next slice within this volume-tile */
- /* pitch_3d == size (in bytes) of a volume-tile */
- unsigned pitch_2d = tile_h * 64;
- unsigned pitch_3d = tile_d * align(nb_h, tile_h) * pitch;
+ /* stride_2d == to next slice within this volume tile */
+ /* stride_3d == size (in bytes) of a volume tile */
+ unsigned stride_2d = tile_h * NV50_TILE_PITCH(tile_mode);
+ unsigned stride_3d = tile_d * align(nbh, tile_h) * pitch;
- return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
+ return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d;
}
-
-
-
static void
-nv50_miptree_destroy(struct pipe_screen *pscreen,
- struct pipe_resource *pt)
+nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
{
- struct nv50_miptree *mt = nv50_miptree(pt);
- unsigned l;
+ struct nv50_miptree *mt = nv50_miptree(pt);
- for (l = 0; l <= pt->last_level; ++l)
- FREE(mt->level[l].image_offset);
+ nouveau_screen_bo_release(pscreen, mt->base.bo);
- nouveau_screen_bo_release(pscreen, mt->base.bo);
- FREE(mt);
+ FREE(mt);
}
static boolean
nv50_miptree_get_handle(struct pipe_screen *pscreen,
- struct pipe_resource *pt,
- struct winsys_handle *whandle)
+ struct pipe_resource *pt,
+ struct winsys_handle *whandle)
{
- struct nv50_miptree *mt = nv50_miptree(pt);
- unsigned stride;
+ struct nv50_miptree *mt = nv50_miptree(pt);
+ unsigned stride;
+ if (!mt || !mt->base.bo)
+ return FALSE;
- if (!mt || !mt->base.bo)
- return FALSE;
+ stride = util_format_get_stride(mt->base.base.format,
+ mt->base.base.width0);
- stride = util_format_get_stride(mt->base.base.format,
- mt->base.base.width0);
-
- return nouveau_screen_bo_get_handle(pscreen,
- mt->base.bo,
- stride,
- whandle);
+ return nouveau_screen_bo_get_handle(pscreen,
+ mt->base.bo,
+ stride,
+ whandle);
}
-
const struct u_resource_vtbl nv50_miptree_vtbl =
{
- nv50_miptree_get_handle, /* get_handle */
- nv50_miptree_destroy, /* resource_destroy */
- NULL, /* is_resource_referenced */
- nv50_miptree_transfer_new, /* get_transfer */
- nv50_miptree_transfer_del, /* transfer_destroy */
+ nv50_miptree_get_handle, /* get_handle */
+ nv50_miptree_destroy, /* resource_destroy */
+ nv50_miptree_transfer_new, /* get_transfer */
+ nv50_miptree_transfer_del, /* transfer_destroy */
nv50_miptree_transfer_map, /* transfer_map */
- u_default_transfer_flush_region, /* transfer_flush_region */
- nv50_miptree_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ nv50_miptree_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
};
-
-
struct pipe_resource *
-nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *tmp)
+nv50_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ)
{
- struct nouveau_device *dev = nouveau_screen(pscreen)->device;
- struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
- struct pipe_resource *pt = &mt->base.base;
- unsigned width = tmp->width0, height = tmp->height0;
- unsigned depth = tmp->depth0, image_alignment;
- uint32_t tile_flags;
- int ret, i, l;
-
- if (!mt)
- return NULL;
-
- *pt = *tmp;
- mt->base.vtbl = &nv50_miptree_vtbl;
- pipe_reference_init(&pt->reference, 1);
- pt->screen = pscreen;
-
- switch (pt->format) {
- case PIPE_FORMAT_Z32_FLOAT:
- tile_flags = 0x4800;
- break;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- tile_flags = 0x1800;
- break;
- case PIPE_FORMAT_Z16_UNORM:
- tile_flags = 0x6c00;
- break;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- tile_flags = 0x2800;
- break;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
- tile_flags = 0xe000;
- break;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- case PIPE_FORMAT_R32G32B32_FLOAT:
- tile_flags = 0x7400;
- break;
- default:
- if ((pt->bind & PIPE_BIND_SCANOUT) &&
- util_format_get_blocksizebits(pt->format) == 32)
- tile_flags = 0x7a00;
- else
- tile_flags = 0x7000;
- break;
- }
-
- /* XXX: texture arrays */
- mt->image_nr = (pt->target == PIPE_TEXTURE_CUBE) ? 6 : 1;
-
- for (l = 0; l <= pt->last_level; l++) {
- struct nv50_miptree_level *lvl = &mt->level[l];
- unsigned nblocksy = util_format_get_nblocksy(pt->format, height);
-
- lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
- lvl->pitch = align(util_format_get_stride(pt->format, width), 64);
- lvl->tile_mode = get_tile_mode(nblocksy, depth);
-
- width = u_minify(width, 1);
- height = u_minify(height, 1);
- depth = u_minify(depth, 1);
- }
-
- image_alignment = get_tile_height(mt->level[0].tile_mode) * 64;
- image_alignment *= get_tile_depth(mt->level[0].tile_mode);
-
- /* NOTE the distinction between arrays of mip-mapped 2D textures and
- * mip-mapped 3D textures. We can't use image_nr == depth for 3D mip.
- */
- for (i = 0; i < mt->image_nr; i++) {
- for (l = 0; l <= pt->last_level; l++) {
- struct nv50_miptree_level *lvl = &mt->level[l];
- int size;
- unsigned tile_h = get_tile_height(lvl->tile_mode);
- unsigned tile_d = get_tile_depth(lvl->tile_mode);
-
- size = lvl->pitch;
- size *= align(util_format_get_nblocksy(pt->format, u_minify(pt->height0, l)), tile_h);
- size *= align(u_minify(pt->depth0, l), tile_d);
-
- lvl->image_offset[i] = mt->total_size;
-
- mt->total_size += size;
- }
- mt->total_size = align(mt->total_size, image_alignment);
- }
-
- ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, mt->total_size,
- mt->level[0].tile_mode, tile_flags,
- &mt->base.bo);
- if (ret) {
- for (l = 0; l <= pt->last_level; ++l)
- FREE(mt->level[l].image_offset);
- FREE(mt);
- return NULL;
- }
-
- return pt;
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
+ struct pipe_resource *pt = &mt->base.base;
+ int ret;
+ unsigned w, h, d, l, alloc_size;
+ uint32_t tile_flags;
+
+ if (!mt)
+ return NULL;
+
+ mt->base.vtbl = &nv50_miptree_vtbl;
+ *pt = *templ;
+ pipe_reference_init(&pt->reference, 1);
+ pt->screen = pscreen;
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ w = pt->width0;
+ h = pt->height0;
+ d = mt->layout_3d ? pt->depth0 : 1;
+
+ switch (pt->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ tile_flags = 0x6c00;
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ tile_flags = 0x1800;
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ tile_flags = 0x2800;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ tile_flags = 0x7400;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+ tile_flags = 0x6000;
+ break;
+ default:
+ if ((pt->bind & PIPE_BIND_SCANOUT) &&
+ util_format_get_blocksizebits(pt->format) == 32)
+ tile_flags = 0x7a00;
+ else
+ tile_flags = 0x7000;
+ break;
+ }
+
+ /* For 3D textures, a mipmap is spanned by all the layers, for array
+ * textures and cube maps, each layer contains its own mipmaps.
+ */
+ for (l = 0; l <= pt->last_level; ++l) {
+ struct nv50_miptree_level *lvl = &mt->level[l];
+ unsigned nbx = util_format_get_nblocksx(pt->format, w);
+ unsigned nby = util_format_get_nblocksy(pt->format, h);
+ unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ lvl->offset = mt->total_size;
+ lvl->tile_mode = get_tile_dims(nbx, nby, d);
+ lvl->pitch = align(nbx * blocksize, NV50_TILE_PITCH(lvl->tile_mode));
+
+ mt->total_size += lvl->pitch *
+ align(nby, NV50_TILE_HEIGHT(lvl->tile_mode)) *
+ align(d, NV50_TILE_DEPTH(lvl->tile_mode));
+
+ w = u_minify(w, 1);
+ h = u_minify(h, 1);
+ d = u_minify(d, 1);
+ }
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size,
+ NV50_TILE_SIZE(mt->level[0].tile_mode));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+
+ alloc_size = mt->total_size;
+
+ ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size,
+ mt->level[0].tile_mode, tile_flags,
+ &mt->base.bo);
+ if (ret) {
+ FREE(mt);
+ return NULL;
+ }
+ mt->base.domain = NOUVEAU_BO_VRAM;
+
+ return pt;
}
-
struct pipe_resource *
nv50_miptree_from_handle(struct pipe_screen *pscreen,
- const struct pipe_resource *template,
- struct winsys_handle *whandle)
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
{
- struct nv50_miptree *mt;
- unsigned stride;
-
- /* Only supports 2D, non-mipmapped textures for the moment */
- if ((template->target != PIPE_TEXTURE_2D &&
- template->target != PIPE_TEXTURE_RECT) ||
- template->last_level != 0 ||
- template->depth0 != 1)
- return NULL;
-
- mt = CALLOC_STRUCT(nv50_miptree);
- if (!mt)
- return NULL;
-
- mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
- if (mt->base.bo == NULL) {
- FREE(mt);
- return NULL;
- }
-
-
- mt->base.base = *template;
- mt->base.vtbl = &nv50_miptree_vtbl;
- pipe_reference_init(&mt->base.base.reference, 1);
- mt->base.base.screen = pscreen;
- mt->image_nr = 1;
- mt->level[0].pitch = stride;
- mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
- mt->level[0].tile_mode = mt->base.bo->tile_mode;
-
- /* XXX: Need to adjust bo refcount??
- */
- /* nouveau_bo_ref(bo, &mt->base.bo); */
- return &mt->base.base;
+ struct nv50_miptree *mt;
+ unsigned stride;
+
+ /* only supports 2D, non-mipmapped textures for the moment */
+ if ((templ->target != PIPE_TEXTURE_2D &&
+ templ->target != PIPE_TEXTURE_RECT) ||
+ templ->last_level != 0 ||
+ templ->depth0 != 1 ||
+ templ->array_size > 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nv50_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
+ if (mt->base.bo == NULL) {
+ FREE(mt);
+ return NULL;
+ }
+
+ mt->base.base = *templ;
+ mt->base.vtbl = &nv50_miptree_vtbl;
+ pipe_reference_init(&mt->base.base.reference, 1);
+ mt->base.base.screen = pscreen;
+ mt->level[0].pitch = stride;
+ mt->level[0].offset = 0;
+ mt->level[0].tile_mode = mt->base.bo->tile_mode;
+
+ /* no need to adjust bo reference count */
+ return &mt->base.base;
}
-
-/* Surface functions
+/* Surface functions.
*/
struct pipe_surface *
-nv50_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt,
- const struct pipe_surface *surf_tmpl)
+nv50_miptree_surface_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *templ)
{
- unsigned level = surf_tmpl->u.tex.level;
- struct nv50_miptree *mt = nv50_miptree(pt);
- struct nv50_miptree_level *lvl = &mt->level[level];
- struct nv50_surface *ns;
- unsigned img = 0, zslice = 0;
-
- assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
-
- /* XXX can't unify these here? */
- if (pt->target == PIPE_TEXTURE_CUBE)
- img = surf_tmpl->u.tex.first_layer;
- else if (pt->target == PIPE_TEXTURE_3D)
- zslice = surf_tmpl->u.tex.first_layer;
-
- ns = CALLOC_STRUCT(nv50_surface);
- if (!ns)
- return NULL;
- pipe_resource_reference(&ns->base.texture, pt);
- ns->base.context = pipe;
- ns->base.format = pt->format;
- ns->base.width = u_minify(pt->width0, level);
- ns->base.height = u_minify(pt->height0, level);
- ns->base.usage = surf_tmpl->usage;
- pipe_reference_init(&ns->base.reference, 1);
- ns->base.u.tex.level = level;
- ns->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
- ns->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
- ns->offset = lvl->image_offset[img];
-
- if (pt->target == PIPE_TEXTURE_3D) {
- unsigned nb_h = util_format_get_nblocksy(pt->format, ns->base.height);
- ns->offset += get_zslice_offset(lvl->tile_mode, zslice,
- lvl->pitch, nb_h);
- }
-
- return &ns->base;
+ struct nv50_miptree *mt = nv50_miptree(pt); /* guaranteed */
+ struct nv50_surface *ns;
+ struct pipe_surface *ps;
+ struct nv50_miptree_level *lvl = &mt->level[templ->u.tex.level];
+
+ ns = CALLOC_STRUCT(nv50_surface);
+ if (!ns)
+ return NULL;
+ ps = &ns->base;
+
+ pipe_reference_init(&ps->reference, 1);
+ pipe_resource_reference(&ps->texture, pt);
+ ps->context = pipe;
+ ps->format = templ->format;
+ ps->usage = templ->usage;
+ ps->u.tex.level = templ->u.tex.level;
+ ps->u.tex.first_layer = templ->u.tex.first_layer;
+ ps->u.tex.last_layer = templ->u.tex.last_layer;
+
+ ns->width = u_minify(pt->width0, ps->u.tex.level);
+ ns->height = u_minify(pt->height0, ps->u.tex.level);
+ ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1;
+ ns->offset = lvl->offset;
+
+ /* comment says there are going to be removed, but they're used by the st */
+ ps->width = ns->width;
+ ps->height = ns->height;
+
+ if (mt->layout_3d) {
+ unsigned zslice = ps->u.tex.first_layer;
+
+ /* TODO: re-layout the texture to use only depth 1 tiles in this case: */
+ if (ns->depth > 1 && (zslice & (NV50_TILE_DEPTH(lvl->tile_mode) - 1)))
+ NOUVEAU_ERR("Creating unsupported 3D surface of slices [%u:%u].\n",
+ zslice, ps->u.tex.last_layer);
+
+ ns->offset += calc_zslice_offset(lvl->tile_mode, zslice, lvl->pitch,
+ util_format_get_nblocksy(pt->format,
+ ns->height));
+ } else {
+ ns->offset += mt->layer_stride * ps->u.tex.first_layer;
+ }
+
+ return ps;
}
void
-nv50_miptree_surface_del(struct pipe_context *pipe,
- struct pipe_surface *ps)
+nv50_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps)
{
- struct nv50_surface *s = nv50_surface(ps);
+ struct nv50_surface *s = nv50_surface(ps);
+
+ pipe_resource_reference(&ps->texture, NULL);
- pipe_resource_reference(&s->base.texture, NULL);
- FREE(s);
+ FREE(s);
}
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c
index c88e7ba742..82f1b84652 100644
--- a/src/gallium/drivers/nv50/nv50_pc.c
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -307,7 +307,10 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
bb[p++] = b->out[j];
break;
case CFG_EDGE_LOOP_LEAVE:
- bbb[pp++] = b->out[j];
+ if (!b->out[j]->priv) {
+ bbb[pp++] = b->out[j];
+ b->out[j]->priv = 1;
+ }
break;
default:
assert(0);
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
index 2ead80430b..e6f3815baf 100644
--- a/src/gallium/drivers/nv50/nv50_pc.h
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -23,6 +23,8 @@
#ifndef __NV50_COMPILER_H__
#define __NV50_COMPILER_H__
+#define NV50PC_DEBUG
+
#ifdef NV50PC_DEBUG
# define NV50_DBGMSG(args...) debug_printf(args)
#else
diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c
index f37dc51e6a..252c58dd8f 100644
--- a/src/gallium/drivers/nv50/nv50_pc_emit.c
+++ b/src/gallium/drivers/nv50/nv50_pc_emit.c
@@ -762,7 +762,8 @@ emit_flow(struct nv_pc *pc, struct nv_instruction *i, ubyte flow_op)
new_fixup(pc, NV50_FIXUP_CODE_RELOC, 0, pos, 0xffff << 11, 9);
new_fixup(pc, NV50_FIXUP_CODE_RELOC, 1, pos, 0x3f << 14, -4);
- pc->emit[0] |= (pos / 4) << 11;
+ pc->emit[0] |= ((pos >> 2) & 0xffff) << 11;
+ pc->emit[1] |= ((pos >> 18) & 0x003f) << 14;
}
}
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index 27eb3817bf..281ccf7ac6 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -145,8 +145,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
int j;
uint size, n32 = 0;
+ /* find first non-empty block emitted before b */
for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
- if (j >= 0) {
+ for (; j >= 0; --j) {
in = pc->bb_list[j];
/* check for no-op branches (BRA $PC+8) */
@@ -160,6 +161,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
nv_nvi_delete(in->exit);
}
b->bin_pos = in->bin_pos + in->bin_size;
+
+ if (in->bin_size) /* no more no-op branches to b */
+ break;
}
pc->bb_list[pc->num_blocks++] = b;
@@ -299,7 +303,7 @@ check_swap_src_0_1(struct nv_instruction *nvi)
}
if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
- nvi->set_cond = cc_swapped[nvi->set_cond];
+ nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
}
static int
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index db68176491..a63f9d8a6d 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -328,10 +328,15 @@ prog_decl(struct nv50_translation_info *ti,
}
break;
case TGSI_FILE_SYSTEM_VALUE:
+ /* For VP/GP inputs, they are put in s[] after the last normal input.
+ * Let sysval_map reflect the order of the sysvals in s[] and fixup later.
+ */
switch (decl->Semantic.Name) {
case TGSI_SEMANTIC_FACE:
break;
case TGSI_SEMANTIC_INSTANCEID:
+ ti->p->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
+ ti->sysval_map[first] = 2;
break;
case TGSI_SEMANTIC_PRIMID:
break;
@@ -392,6 +397,18 @@ nv50_vertprog_prepare(struct nv50_translation_info *ti)
}
}
+ for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
+ switch (ti->sysval_map[i]) {
+ case 2:
+ if (!(ti->p->vp.attrs[2] & NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID))
+ ti->sysval_map[i] = 1;
+ ti->sysval_map[i] = (ti->sysval_map[i] - 1) + num_inputs;
+ break;
+ default:
+ break;
+ }
+ }
+
if (p->vp.psiz < 0x40)
p->vp.psiz = p->out[p->vp.psiz].hw;
@@ -411,11 +428,11 @@ nv50_fragprog_prepare(struct nv50_translation_info *ti)
if (ti->scan.writes_z) {
p->fp.flags[1] = 0x11;
- p->fp.flags[0] |= NV50TCL_FP_CONTROL_EXPORTS_Z;
+ p->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
}
if (ti->scan.uses_kill)
- p->fp.flags[0] |= NV50TCL_FP_CONTROL_USES_KIL;
+ p->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
/* FP inputs */
@@ -490,13 +507,13 @@ nv50_fragprog_prepare(struct nv50_translation_info *ti)
if (n < m)
nvary -= p->in[n].hw;
- p->fp.interp |= nvary << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT;
- p->fp.interp |= nintp << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT;
+ p->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
+ p->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
/* FP outputs */
if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0)))
- p->fp.flags[0] |= NV50TCL_FP_CONTROL_MULTIPLE_RESULTS;
+ p->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
depr = p->out_nr;
for (i = 0; i < p->out_nr; ++i) {
@@ -608,7 +625,7 @@ nv50_prog_scan(struct nv50_translation_info *ti)
}
boolean
-nv50_program_tx(struct nv50_program *p)
+nv50_program_translate(struct nv50_program *p)
{
struct nv50_translation_info *ti;
int ret;
@@ -646,9 +663,8 @@ out:
void
nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
{
- nouveau_bo_ref(NULL, &p->bo);
-
- so_ref(NULL, &p->so);
+ if (p->res)
+ nouveau_resource_free(&p->res);
if (p->code)
FREE(p->code);
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 33c4c8ca6d..993e1691ab 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -47,12 +47,9 @@ struct nv50_program {
boolean translated;
boolean uses_lmem;
- struct nouveau_bo *bo;
- struct nouveau_stateobj *so;
-
uint32_t *code;
unsigned code_size;
- unsigned code_start; /* offset inside bo */
+ unsigned code_base;
uint32_t *immd;
unsigned immd_size;
unsigned parm_size; /* size limit of uniform buffer */
@@ -89,6 +86,8 @@ struct nv50_program {
/* relocation records */
void *fixups;
unsigned num_fixups;
+
+ struct nouveau_resource *res;
};
#define NV50_INTERP_LINEAR (1 << 0)
@@ -112,6 +111,7 @@ struct nv50_translation_info {
ubyte output_file;
ubyte input_map[PIPE_MAX_SHADER_INPUTS][4];
ubyte output_map[PIPE_MAX_SHADER_OUTPUTS][4];
+ ubyte sysval_map[TGSI_SEMANTIC_COUNT];
ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
int input_access[PIPE_MAX_SHADER_INPUTS][4];
int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
index 380f69406a..e8ad1ddd38 100644
--- a/src/gallium/drivers/nv50/nv50_push.c
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -1,362 +1,297 @@
+
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
-#include "util/u_split_prim.h"
+#include "translate/translate.h"
#include "nv50_context.h"
#include "nv50_resource.h"
-struct push_context {
- struct nv50_context *nv50;
+#include "nv50_3d.xml.h"
- unsigned vtx_size;
+struct push_context {
+ struct nouveau_channel *chan;
void *idxbuf;
- int32_t idxbias;
- unsigned idxsize;
float edgeflag;
int edgeflag_attr;
- struct {
- void *map;
- unsigned stride;
- unsigned divisor;
- unsigned step;
- void (*push)(struct nouveau_channel *, void *);
- } attr[16];
- unsigned attr_nr;
+ uint32_t vertex_words;
+ uint32_t packet_vertex_limit;
+
+ struct translate *translate;
+
+ boolean primitive_restart;
+ uint32_t prim;
+ uint32_t restart_index;
+ uint32_t instance_id;
};
-static void
-emit_b32_1(struct nouveau_channel *chan, void *data)
+static INLINE unsigned
+prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
}
-static void
-emit_b32_2(struct nouveau_channel *chan, void *data)
+static INLINE unsigned
+prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
- OUT_RING(chan, v[1]);
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
}
-static void
-emit_b32_3(struct nouveau_channel *chan, void *data)
+static INLINE unsigned
+prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
- OUT_RING(chan, v[1]);
- OUT_RING(chan, v[2]);
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
}
static void
-emit_b32_4(struct nouveau_channel *chan, void *data)
+emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
{
- uint32_t *v = data;
+ uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
- OUT_RING(chan, v[0]);
- OUT_RING(chan, v[1]);
- OUT_RING(chan, v[2]);
- OUT_RING(chan, v[3]);
-}
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
-static void
-emit_b16_1(struct nouveau_channel *chan, void *data)
-{
- uint16_t *v = data;
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i08(elts, push, ctx->restart_index);
- OUT_RING(chan, v[0]);
-}
+ size = ctx->vertex_words * nr;
-static void
-emit_b16_3(struct nouveau_channel *chan, void *data)
-{
- uint16_t *v = data;
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
- OUT_RING(chan, (v[1] << 16) | v[0]);
- OUT_RING(chan, v[2]);
-}
+ ctx->translate->run_elts8(ctx->translate, elts, nr, ctx->instance_id,
+ ctx->chan->cur);
-static void
-emit_b08_1(struct nouveau_channel *chan, void *data)
-{
- uint8_t *v = data;
+ ctx->chan->cur += size;
+ count -= nr;
+ elts += nr;
- OUT_RING(chan, v[0]);
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_RING(ctx->chan, RING_3D(VB_ELEMENT_U32), 1);
+ OUT_RING (ctx->chan, ctx->restart_index);
+ }
+ }
}
static void
-emit_b08_3(struct nouveau_channel *chan, void *data)
+emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
{
- uint8_t *v = data;
+ uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
- OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
-}
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
-static INLINE void
-emit_vertex(struct push_context *ctx, unsigned n)
-{
- struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
- int i;
-
- if (ctx->edgeflag_attr < 16) {
- float *edgeflag = (float *)
- ((uint8_t *)ctx->attr[ctx->edgeflag_attr].map +
- ctx->attr[ctx->edgeflag_attr].stride * n);
-
- if (*edgeflag != ctx->edgeflag) {
- BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
- OUT_RING (chan, *edgeflag ? 1 : 0);
- ctx->edgeflag = *edgeflag;
- }
- }
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i16(elts, push, ctx->restart_index);
- BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
- for (i = 0; i < ctx->attr_nr; i++)
- ctx->attr[i].push(chan,
- (uint8_t *)ctx->attr[i].map + ctx->attr[i].stride * n);
-}
+ size = ctx->vertex_words * nr;
-static void
-emit_edgeflag(void *priv, boolean enabled)
-{
- struct push_context *ctx = priv;
- struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
- BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
- OUT_RING (chan, enabled ? 1 : 0);
-}
+ ctx->translate->run_elts16(ctx->translate, elts, nr, ctx->instance_id,
+ ctx->chan->cur);
-static void
-emit_elt08(void *priv, unsigned start, unsigned count)
-{
- struct push_context *ctx = priv;
- uint8_t *idxbuf = ctx->idxbuf;
+ ctx->chan->cur += size;
+ count -= nr;
+ elts += nr;
- while (count--)
- emit_vertex(ctx, idxbuf[start++]);
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_RING(ctx->chan, RING_3D(VB_ELEMENT_U32), 1);
+ OUT_RING (ctx->chan, ctx->restart_index);
+ }
+ }
}
static void
-emit_elt08_biased(void *priv, unsigned start, unsigned count)
+emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
{
- struct push_context *ctx = priv;
- uint8_t *idxbuf = ctx->idxbuf;
+ uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
- while (count--)
- emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
-}
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
-static void
-emit_elt16(void *priv, unsigned start, unsigned count)
-{
- struct push_context *ctx = priv;
- uint16_t *idxbuf = ctx->idxbuf;
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i32(elts, push, ctx->restart_index);
- while (count--)
- emit_vertex(ctx, idxbuf[start++]);
-}
+ size = ctx->vertex_words * nr;
-static void
-emit_elt16_biased(void *priv, unsigned start, unsigned count)
-{
- struct push_context *ctx = priv;
- uint16_t *idxbuf = ctx->idxbuf;
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
- while (count--)
- emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
-}
+ ctx->translate->run_elts(ctx->translate, elts, nr, ctx->instance_id,
+ ctx->chan->cur);
-static void
-emit_elt32(void *priv, unsigned start, unsigned count)
-{
- struct push_context *ctx = priv;
- uint32_t *idxbuf = ctx->idxbuf;
+ ctx->chan->cur += size;
+ count -= nr;
+ elts += nr;
- while (count--)
- emit_vertex(ctx, idxbuf[start++]);
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_RING(ctx->chan, RING_3D(VB_ELEMENT_U32), 1);
+ OUT_RING (ctx->chan, ctx->restart_index);
+ }
+ }
}
static void
-emit_elt32_biased(void *priv, unsigned start, unsigned count)
+emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
{
- struct push_context *ctx = priv;
- uint32_t *idxbuf = ctx->idxbuf;
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size = ctx->vertex_words * push;
+
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
- while (count--)
- emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
+ ctx->translate->run(ctx->translate, start, push, ctx->instance_id,
+ ctx->chan->cur);
+ ctx->chan->cur += size;
+ count -= push;
+ start += push;
+ }
}
-static void
-emit_verts(void *priv, unsigned start, unsigned count)
+
+#define NV50_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nv50_prim_gl(unsigned prim)
{
- while (count--)
- emit_vertex(priv, start++);
+ switch (prim) {
+ NV50_PRIM_GL_CASE(POINTS);
+ NV50_PRIM_GL_CASE(LINES);
+ NV50_PRIM_GL_CASE(LINE_LOOP);
+ NV50_PRIM_GL_CASE(LINE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLES);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLE_FAN);
+ NV50_PRIM_GL_CASE(QUADS);
+ NV50_PRIM_GL_CASE(QUAD_STRIP);
+ NV50_PRIM_GL_CASE(POLYGON);
+ NV50_PRIM_GL_CASE(LINES_ADJACENCY);
+ NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NV50_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
}
void
-nv50_push_elements_instanced(struct pipe_context *pipe,
- struct pipe_resource *idxbuf,
- unsigned idxsize, int idxbias,
- unsigned mode, unsigned start, unsigned count,
- unsigned i_start, unsigned i_count)
+nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
struct push_context ctx;
- const unsigned p_overhead = 4 + /* begin/end */
- 4; /* potential edgeflag enable/disable */
- const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
- 2; /* potential edgeflag modification */
- struct util_split_prim s;
- unsigned vtx_size;
- boolean nzi = FALSE;
- int i;
-
- ctx.nv50 = nv50;
- ctx.attr_nr = 0;
- ctx.idxbuf = NULL;
- ctx.vtx_size = 0;
- ctx.edgeflag = 0.5f;
- ctx.edgeflag_attr = nv50->vertprog->vp.edgeflag;
-
- /* map vertex buffers, determine vertex size */
- for (i = 0; i < nv50->vtxelt->num_elements; i++) {
- struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
- struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
- struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
- unsigned size, nr_components, n;
-
- if (!(nv50->vbo_fifo & (1 << i)))
- continue;
- n = ctx.attr_nr++;
-
- if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
- assert(bo->map);
+ unsigned i, index_size;
+ unsigned inst = info->instance_count;
+ boolean apply_bias = info->indexed && info->index_bias;
+
+ ctx.chan = nv50->screen->base.channel;
+ ctx.translate = nv50->vertex->translate;
+ ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit;
+ ctx.vertex_words = nv50->vertex->vertex_size;
+
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ uint8_t *data;
+ struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
+ struct nv04_resource *res = nv04_resource(vb->buffer);
+
+ data = nouveau_resource_map_offset(&nv50->base, res,
+ vb->buffer_offset, NOUVEAU_BO_RD);
+
+ if (apply_bias && likely(!(nv50->vertex->instance_bufs & (1 << i))))
+ data += info->index_bias * vb->stride;
+
+ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+ }
+
+ if (info->indexed) {
+ ctx.idxbuf = nouveau_resource_map_offset(&nv50->base,
+ nv04_resource(nv50->idxbuf.buffer),
+ nv50->idxbuf.offset, NOUVEAU_BO_RD);
+ if (!ctx.idxbuf)
return;
- }
- ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset;
- nouveau_bo_unmap(bo);
-
- ctx.attr[n].stride = vb->stride;
- ctx.attr[n].divisor = ve->instance_divisor;
- if (ctx.attr[n].divisor) {
- ctx.attr[n].step = i_start % ve->instance_divisor;
- ctx.attr[n].map = (uint8_t *)ctx.attr[n].map + i_start * vb->stride;
- }
+ index_size = nv50->idxbuf.index_size;
+ ctx.primitive_restart = info->primitive_restart;
+ ctx.restart_index = info->restart_index;
+ } else {
+ ctx.idxbuf = NULL;
+ index_size = 0;
+ ctx.primitive_restart = FALSE;
+ ctx.restart_index = 0;
+ }
+
+ ctx.instance_id = info->start_instance;
+ ctx.prim = nv50_prim_gl(info->mode);
- size = util_format_get_component_bits(ve->src_format,
- UTIL_FORMAT_COLORSPACE_RGB, 0);
- nr_components = util_format_get_nr_components(ve->src_format);
- switch (size) {
- case 8:
- switch (nr_components) {
- case 1: ctx.attr[n].push = emit_b08_1; break;
- case 2: ctx.attr[n].push = emit_b16_1; break;
- case 3: ctx.attr[n].push = emit_b08_3; break;
- case 4: ctx.attr[n].push = emit_b32_1; break;
- }
- ctx.vtx_size++;
+ if (info->primitive_restart) {
+ BEGIN_RING(ctx.chan, RING_3D(PRIM_RESTART_ENABLE), 2);
+ OUT_RING (ctx.chan, 1);
+ OUT_RING (ctx.chan, info->restart_index);
+ } else
+ if (nv50->state.prim_restart) {
+ BEGIN_RING(ctx.chan, RING_3D(PRIM_RESTART_ENABLE), 1);
+ OUT_RING (ctx.chan, 0);
+ }
+ nv50->state.prim_restart = info->primitive_restart;
+
+ while (inst--) {
+ BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (ctx.chan, ctx.prim);
+ switch (index_size) {
+ case 0:
+ emit_vertices_seq(&ctx, info->start, info->count);
+ break;
+ case 1:
+ emit_vertices_i08(&ctx, info->start, info->count);
break;
- case 16:
- switch (nr_components) {
- case 1: ctx.attr[n].push = emit_b16_1; break;
- case 2: ctx.attr[n].push = emit_b32_1; break;
- case 3: ctx.attr[n].push = emit_b16_3; break;
- case 4: ctx.attr[n].push = emit_b32_2; break;
- }
- ctx.vtx_size += (nr_components + 1) >> 1;
+ case 2:
+ emit_vertices_i16(&ctx, info->start, info->count);
break;
- case 32:
- switch (nr_components) {
- case 1: ctx.attr[n].push = emit_b32_1; break;
- case 2: ctx.attr[n].push = emit_b32_2; break;
- case 3: ctx.attr[n].push = emit_b32_3; break;
- case 4: ctx.attr[n].push = emit_b32_4; break;
- }
- ctx.vtx_size += nr_components;
+ case 4:
+ emit_vertices_i32(&ctx, info->start, info->count);
break;
default:
assert(0);
- return;
+ break;
}
- }
- vtx_size = ctx.vtx_size + v_overhead;
+ BEGIN_RING(ctx.chan, RING_3D(VERTEX_END_GL), 1);
+ OUT_RING (ctx.chan, 0);
- /* map index buffer, if present */
- if (idxbuf) {
- struct nouveau_bo *bo = nv50_resource(idxbuf)->bo;
-
- if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
- assert(bo->map);
- return;
- }
- ctx.idxbuf = bo->map;
- ctx.idxbias = idxbias;
- ctx.idxsize = idxsize;
- nouveau_bo_unmap(bo);
+ ctx.instance_id++;
+ ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
- s.priv = &ctx;
- s.edge = emit_edgeflag;
- if (idxbuf) {
- if (idxsize == 1)
- s.emit = idxbias ? emit_elt08_biased : emit_elt08;
- else
- if (idxsize == 2)
- s.emit = idxbias ? emit_elt16_biased : emit_elt16;
- else
- s.emit = idxbias ? emit_elt32_biased : emit_elt32;
- } else
- s.emit = emit_verts;
-
- /* per-instance loop */
- BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
- OUT_RING (chan, NV50_CB_AUX | (24 << 8));
- OUT_RING (chan, i_start);
- while (i_count--) {
- unsigned max_verts;
- boolean done;
-
- for (i = 0; i < ctx.attr_nr; i++) {
- if (!ctx.attr[i].divisor ||
- ctx.attr[i].divisor != ++ctx.attr[i].step)
- continue;
- ctx.attr[i].step = 0;
- ctx.attr[i].map = (uint8_t *)ctx.attr[i].map + ctx.attr[i].stride;
- }
+ if (info->indexed)
+ nouveau_resource_unmap(nv04_resource(nv50->idxbuf.buffer));
- util_split_prim_init(&s, mode, start, count);
- do {
- if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
- FIRE_RING(chan);
- if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
- assert(0);
- return;
- }
- }
-
- max_verts = AVAIL_RING(chan);
- max_verts -= p_overhead;
- max_verts /= vtx_size;
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
- done = util_split_prim_next(&s, max_verts);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
- } while (!done);
-
- nzi = TRUE;
- }
+ for (i = 0; i < nv50->num_vtxbufs; ++i)
+ nouveau_resource_unmap(nv04_resource(nv50->vtxbuf[i].buffer));
}
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 53f94820ce..2dce94a477 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Ben Skeggs
+ * Copyright 2011 Nouveau Project
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -18,150 +18,320 @@
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
*/
-#include "pipe/p_context.h"
-#include "util/u_inlines.h"
-
#include "nv50_context.h"
+#include "nouveau/nv_object.xml.h"
+
+/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
+ * (since we use only a single GPU channel per screen) will not work properly.
+ *
+ * The first is not that big of an issue because OpenGL does not allow nested
+ * queries anyway.
+ */
struct nv50_query {
- struct nouveau_bo *bo;
- unsigned type;
- boolean ready;
- uint64_t result;
+ uint32_t *data;
+ uint32_t type;
+ uint32_t sequence;
+ struct nouveau_bo *bo;
+ uint32_t base;
+ uint32_t offset; /* base + i * 16 */
+ boolean ready;
+ boolean is64bit;
+ struct nouveau_mm_allocation *mm;
};
+#define NV50_QUERY_ALLOC_SPACE 128
+
static INLINE struct nv50_query *
nv50_query(struct pipe_query *pipe)
{
- return (struct nv50_query *)pipe;
+ return (struct nv50_query *)pipe;
+}
+
+static boolean
+nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
+{
+ struct nv50_screen *screen = nv50->screen;
+ int ret;
+
+ if (q->bo) {
+ nouveau_bo_ref(NULL, &q->bo);
+ if (q->mm) {
+ if (q->ready)
+ nouveau_mm_free(q->mm);
+ else
+ nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, q->mm);
+ }
+ }
+ if (size) {
+ q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
+ if (!q->bo)
+ return FALSE;
+ q->offset = q->base;
+
+ ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD |
+ NOUVEAU_BO_NOSYNC);
+ if (ret) {
+ nv50_query_allocate(nv50, q, 0);
+ return FALSE;
+ }
+ q->data = q->bo->map;
+ nouveau_bo_unmap(q->bo);
+ }
+ return TRUE;
+}
+
+static void
+nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
+ FREE(nv50_query(pq));
}
static struct pipe_query *
nv50_query_create(struct pipe_context *pipe, unsigned type)
{
- struct nouveau_device *dev = nouveau_screen(pipe->screen)->device;
- struct nv50_query *q = CALLOC_STRUCT(nv50_query);
- int ret;
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_query *q;
+
+ q = CALLOC_STRUCT(nv50_query);
+ if (!q)
+ return NULL;
- assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER);
- q->type = type;
+ if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) {
+ FREE(q);
+ return NULL;
+ }
- ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 256,
- 16, &q->bo);
- if (ret) {
- FREE(q);
- return NULL;
- }
+ q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
+ type == PIPE_QUERY_PRIMITIVES_EMITTED ||
+ type == PIPE_QUERY_SO_STATISTICS);
+ q->type = type;
- return (struct pipe_query *)q;
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset -= 16;
+ q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
+ }
+
+ return (struct pipe_query *)q;
}
static void
-nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+nv50_query_get(struct nouveau_channel *chan, struct nv50_query *q,
+ unsigned offset, uint32_t get)
{
- struct nv50_query *q = nv50_query(pq);
+ offset += q->offset;
- if (q) {
- nouveau_bo_ref(NULL, &q->bo);
- FREE(q);
- }
+ MARK_RING (chan, 5, 2);
+ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RING (chan, q->sequence);
+ OUT_RING (chan, get);
}
static void
nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_query *q = nv50_query(pq);
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nv50_query *q = nv50_query(pq);
+
+ /* For occlusion queries we have to change the storage, because a previous
+ * query might set the initial render conition to FALSE even *after* we re-
+ * initialized it to TRUE.
+ */
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset += 16;
+ q->data += 16 / sizeof(*q->data);
+ if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE)
+ nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE);
- BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1);
- OUT_RING (chan, 1);
+ /* XXX: can we do this with the GPU, and sync with respect to a previous
+ * query ?
+ */
+ q->data[1] = 1; /* initial render condition = TRUE */
+ }
+ if (!q->is64bit)
+ q->data[0] = q->sequence++; /* the previously used one */
- q->ready = FALSE;
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1);
+ OUT_RING (chan, NV50_3D_COUNTER_RESET_SAMPLECNT);
+ BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+ OUT_RING (chan, 1);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
+ BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1);
+ OUT_RING (chan, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1);
+ OUT_RING (chan, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
+ OUT_RING (chan, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK);
+ OUT_RING (chan, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nv50_query_get(chan, q, 0x10, 0x00005002);
+ break;
+ default:
+ break;
+ }
+ q->ready = FALSE;
}
static void
nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_query *q = nv50_query(pq);
-
- MARK_RING (chan, 5, 2); /* flush on lack of space or relocs */
- BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4);
- OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
- OUT_RING (chan, 0x00000000);
- OUT_RING (chan, 0x0100f002);
-
- BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1);
- OUT_RING (chan, 0);
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nv50_query *q = nv50_query(pq);
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ nv50_query_get(chan, q, 0, 0x0100f002);
+ BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+ OUT_RING (chan, 0);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nv50_query_get(chan, q, 0, 0x06805002);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nv50_query_get(chan, q, 0, 0x05805002);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nv50_query_get(chan, q, 0x00, 0x05805002);
+ nv50_query_get(chan, q, 0x10, 0x06805002);
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nv50_query_get(chan, q, 0, 0x00005002);
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ nv50_query_get(chan, q, 0, 0x1000f010);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static INLINE boolean
+nv50_query_ready(struct nv50_query *q)
+{
+ return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
+}
+
+static INLINE boolean
+nv50_query_wait(struct nv50_query *q)
+{
+ int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD);
+ if (ret)
+ return FALSE;
+ nouveau_bo_unmap(q->bo);
+ return TRUE;
}
static boolean
nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
- boolean wait, void *vresult)
+ boolean wait, void *result)
{
- uint64_t *result = (uint64_t*)vresult;
- struct nv50_query *q = nv50_query(pq);
- int ret;
-
- if (!q->ready) {
- ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD |
- (wait ? 0 : NOUVEAU_BO_NOWAIT));
- if (ret)
- return false;
- q->result = ((uint32_t *)q->bo->map)[1];
- q->ready = TRUE;
- nouveau_bo_unmap(q->bo);
- }
-
- *result = q->result;
- return q->ready;
+ struct nv50_query *q = nv50_query(pq);
+ uint64_t *res64 = result;
+ boolean *res8 = result;
+ uint64_t *data64 = (uint64_t *)q->data;
+
+ if (q->type == PIPE_QUERY_GPU_FINISHED) {
+ res8[0] = nv50_query_ready(q);
+ return TRUE;
+ }
+
+ if (!q->ready) /* update ? */
+ q->ready = nv50_query_ready(q);
+ if (!q->ready) {
+ struct nouveau_channel *chan = nv50_context(pipe)->screen->base.channel;
+ if (!wait) {
+ if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */
+ FIRE_RING(chan);
+ return FALSE;
+ }
+ if (!nv50_query_wait(q))
+ return FALSE;
+ }
+ q->ready = TRUE;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
+ res64[0] = q->data[1];
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
+ res64[0] = data64[0];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ res64[0] = data64[0];
+ res64[1] = data64[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
+ res64[0] = 1000000000;
+ res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ res64[0] = data64[1] - data64[3];
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
}
static void
nv50_render_condition(struct pipe_context *pipe,
- struct pipe_query *pq, uint mode)
+ struct pipe_query *pq, uint mode)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_query *q;
-
- if (!pq) {
- BEGIN_RING(chan, tesla, NV50TCL_COND_MODE, 1);
- OUT_RING (chan, NV50TCL_COND_MODE_ALWAYS);
- return;
- }
- q = nv50_query(pq);
-
- if (mode == PIPE_RENDER_COND_WAIT ||
- mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
- /* XXX: big fence, FIFO semaphore might be better */
- BEGIN_RING(chan, tesla, 0x0110, 1);
- OUT_RING (chan, 0);
- }
-
- BEGIN_RING(chan, tesla, NV50TCL_COND_ADDRESS_HIGH, 3);
- OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RING (chan, NV50TCL_COND_MODE_RES);
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nv50_query *q;
+
+ if (!pq) {
+ BEGIN_RING(chan, RING_3D(COND_MODE), 1);
+ OUT_RING (chan, NV50_3D_COND_MODE_ALWAYS);
+ return;
+ }
+ q = nv50_query(pq);
+
+ if (mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ BEGIN_RING(chan, RING_3D_(NV50_GRAPH_WAIT_FOR_IDLE), 1);
+ OUT_RING (chan, 0);
+ }
+
+ MARK_RING (chan, 4, 2);
+ BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RING (chan, NV50_3D_COND_MODE_RES_NON_ZERO);
}
void
nv50_init_query_functions(struct nv50_context *nv50)
{
- nv50->pipe.create_query = nv50_query_create;
- nv50->pipe.destroy_query = nv50_query_destroy;
- nv50->pipe.begin_query = nv50_query_begin;
- nv50->pipe.end_query = nv50_query_end;
- nv50->pipe.get_query_result = nv50_query_result;
- nv50->pipe.render_condition = nv50_render_condition;
+ struct pipe_context *pipe = &nv50->base.pipe;
+
+ pipe->create_query = nv50_query_create;
+ pipe->destroy_query = nv50_query_destroy;
+ pipe->begin_query = nv50_query_begin;
+ pipe->end_query = nv50_query_end;
+ pipe->get_query_result = nv50_query_result;
+ pipe->render_condition = nv50_render_condition;
}
diff --git a/src/gallium/drivers/nv50/nv50_reg.h b/src/gallium/drivers/nv50/nv50_reg.h
deleted file mode 100644
index 949838b33f..0000000000
--- a/src/gallium/drivers/nv50/nv50_reg.h
+++ /dev/null
@@ -1,1827 +0,0 @@
-/*************************************************************************
-
- Autogenerated file, do not edit !
-
- This file was generated by renouveau-gen from renouveau.xml, the
- XML database of nvidia objects and methods. renouveau-gen and
- renouveau.xml can be found in CVS module renouveau of sourceforge.net
- project nouveau:
-
-cvs -z3 -d:pserver:anonymous@nouveau.cvs.sourceforge.net:/cvsroot/nouveau co -P renouveau
-
-**************************************************************************
-
- Copyright (C) 2006-2008 :
- Dmitry Baryshkov,
- Laurent Carlier,
- Matthieu Castet,
- Dawid Gajownik,
- Jeremy Kolb,
- Stephane Loeuillet,
- Patrice Mandin,
- Stephane Marchesin,
- Serge Martin,
- Sylvain Munaut,
- Simon Raffeiner,
- Ben Skeggs,
- Erik Waling,
- koala_br,
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*************************************************************************/
-
-
-#ifndef NOUVEAU_REG_H
-#define NOUVEAU_REG_H 1
-
-
-#define NV04_MEMORY_TO_MEMORY_FORMAT 0x00000039
-
-#define NV04_MEMORY_TO_MEMORY_FORMAT_NOP 0x00000100
-#define NV04_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104
-#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY 0x00000180
-#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN 0x00000184
-#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_OUT 0x00000188
-#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c
-#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT 0x00000310
-#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN 0x00000314
-#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT 0x00000318
-#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN 0x0000031c
-#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT 0x00000320
-#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT 0x00000324
-#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_SHIFT 0
-#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_MASK 0x000000ff
-#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_SHIFT 8
-#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_MASK 0x0000ff00
-#define NV04_MEMORY_TO_MEMORY_FORMAT_BUF_NOTIFY 0x00000328
-
-
-#define NV50_MEMORY_TO_MEMORY_FORMAT 0x00005039
-
-#define NV50_MEMORY_TO_MEMORY_FORMAT_SERIALIZE 0x00000110
-#define NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN 0x00000200
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_IN 0x00000204
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_IN 0x00000208
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_IN 0x0000020c
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_IN 0x00000210
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Z 0x00000214
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN 0x00000218
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_X_SHIFT 0
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_X_MASK 0x0000ffff
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Y_SHIFT 16
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Y_MASK 0xffff0000
-#define NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT 0x0000021c
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_OUT 0x00000220
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_OUT 0x00000224
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_OUT 0x00000228
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_OUT 0x0000022c
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Z 0x00000230
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT 0x00000234
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_X_SHIFT 0
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_X_MASK 0x0000ffff
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Y_SHIFT 16
-#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Y_MASK 0xffff0000
-#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH 0x00000238
-#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x0000023c
-
-
-#define NV50_2D 0x0000502d
-
-#define NV50_2D_NOP 0x00000100
-#define NV50_2D_NOTIFY 0x00000104
-#define NV50_2D_SERIALIZE 0x00000110
-#define NV50_2D_DMA_NOTIFY 0x00000180
-#define NV50_2D_DMA_DST 0x00000184
-#define NV50_2D_DMA_SRC 0x00000188
-#define NV50_2D_DMA_COND 0x0000018c
-#define NV50_2D_DST_FORMAT 0x00000200
-#define NV50_2D_DST_FORMAT_R32G32B32A32_FLOAT 0x000000c0
-#define NV50_2D_DST_FORMAT_R32G32B32A32_SINT 0x000000c1
-#define NV50_2D_DST_FORMAT_R32G32B32A32_UINT 0x000000c2
-#define NV50_2D_DST_FORMAT_R32G32B32X32_FLOAT 0x000000c3
-#define NV50_2D_DST_FORMAT_R16G16B16A16_UNORM 0x000000c6
-#define NV50_2D_DST_FORMAT_R16G16B16A16_SNORM 0x000000c7
-#define NV50_2D_DST_FORMAT_R16G16B16A16_SINT 0x000000c8
-#define NV50_2D_DST_FORMAT_R16G16B16A16_UINT 0x000000c9
-#define NV50_2D_DST_FORMAT_R16G16B16A16_FLOAT 0x000000ca
-#define NV50_2D_DST_FORMAT_R32G32_FLOAT 0x000000cb
-#define NV50_2D_DST_FORMAT_R32G32_SINT 0x000000cc
-#define NV50_2D_DST_FORMAT_R32G32_UINT 0x000000cd
-#define NV50_2D_DST_FORMAT_R16G16B16X16_FLOAT 0x000000ce
-#define NV50_2D_DST_FORMAT_A8R8G8B8_UNORM 0x000000cf
-#define NV50_2D_DST_FORMAT_A8R8G8B8_SRGB 0x000000d0
-#define NV50_2D_DST_FORMAT_A2B10G10R10_UNORM 0x000000d1
-#define NV50_2D_DST_FORMAT_A2B10G10R10_UINT 0x000000d2
-#define NV50_2D_DST_FORMAT_A8B8G8R8_UNORM 0x000000d5
-#define NV50_2D_DST_FORMAT_A8B8G8R8_SRGB 0x000000d6
-#define NV50_2D_DST_FORMAT_A8B8G8R8_SNORM 0x000000d7
-#define NV50_2D_DST_FORMAT_A8B8G8R8_SINT 0x000000d8
-#define NV50_2D_DST_FORMAT_A8B8G8R8_UINT 0x000000d9
-#define NV50_2D_DST_FORMAT_R16G16_UNORM 0x000000da
-#define NV50_2D_DST_FORMAT_R16G16_SNORM 0x000000db
-#define NV50_2D_DST_FORMAT_R16G16_SINT 0x000000dc
-#define NV50_2D_DST_FORMAT_R16G16_UINT 0x000000dd
-#define NV50_2D_DST_FORMAT_R16G16_FLOAT 0x000000de
-#define NV50_2D_DST_FORMAT_A2R10G10B10_UNORM 0x000000df
-#define NV50_2D_DST_FORMAT_B10G11R11_FLOAT 0x000000e0
-#define NV50_2D_DST_FORMAT_R32_FLOAT 0x000000e5
-#define NV50_2D_DST_FORMAT_X8R8G8B8_UNORM 0x000000e6
-#define NV50_2D_DST_FORMAT_X8R8G8B8_SRGB 0x000000e7
-#define NV50_2D_DST_FORMAT_R5G6B5_UNORM 0x000000e8
-#define NV50_2D_DST_FORMAT_A1R5G5B5_UNORM 0x000000e9
-#define NV50_2D_DST_FORMAT_R8G8_UNORM 0x000000ea
-#define NV50_2D_DST_FORMAT_R8G8_SNORM 0x000000eb
-#define NV50_2D_DST_FORMAT_R8G8_SINT 0x000000ec
-#define NV50_2D_DST_FORMAT_R8G8_UINT 0x000000ed
-#define NV50_2D_DST_FORMAT_R16_UNORM 0x000000ee
-#define NV50_2D_DST_FORMAT_R16_SNORM 0x000000ef
-#define NV50_2D_DST_FORMAT_R16_SINT 0x000000f0
-#define NV50_2D_DST_FORMAT_R16_UINT 0x000000f1
-#define NV50_2D_DST_FORMAT_R16_FLOAT 0x000000f2
-#define NV50_2D_DST_FORMAT_R8_UNORM 0x000000f3
-#define NV50_2D_DST_FORMAT_R8_SNORM 0x000000f4
-#define NV50_2D_DST_FORMAT_R8_SINT 0x000000f5
-#define NV50_2D_DST_FORMAT_R8_UINT 0x000000f6
-#define NV50_2D_DST_FORMAT_A8_UNORM 0x000000f7
-#define NV50_2D_DST_FORMAT_X1R5G5B5_UNORM 0x000000f8
-#define NV50_2D_DST_FORMAT_X8B8G8R8_UNORM 0x000000f9
-#define NV50_2D_DST_FORMAT_X8B8G8R8_SRGB 0x000000fa
-#define NV50_2D_DST_LINEAR 0x00000204
-#define NV50_2D_DST_TILE_MODE 0x00000208
-#define NV50_2D_DST_DEPTH 0x0000020c
-#define NV50_2D_DST_LAYER 0x00000210
-#define NV50_2D_DST_PITCH 0x00000214
-#define NV50_2D_DST_WIDTH 0x00000218
-#define NV50_2D_DST_HEIGHT 0x0000021c
-#define NV50_2D_DST_ADDRESS_HIGH 0x00000220
-#define NV50_2D_DST_ADDRESS_LOW 0x00000224
-#define NV50_2D_SRC_FORMAT 0x00000230
-#define NV50_2D_SRC_FORMAT_R32G32B32A32_FLOAT 0x000000c0
-#define NV50_2D_SRC_FORMAT_R32G32B32A32_SINT 0x000000c1
-#define NV50_2D_SRC_FORMAT_R32G32B32A32_UINT 0x000000c2
-#define NV50_2D_SRC_FORMAT_R32G32B32X32_FLOAT 0x000000c3
-#define NV50_2D_SRC_FORMAT_R16G16B16A16_UNORM 0x000000c6
-#define NV50_2D_SRC_FORMAT_R16G16B16A16_SNORM 0x000000c7
-#define NV50_2D_SRC_FORMAT_R16G16B16A16_SINT 0x000000c8
-#define NV50_2D_SRC_FORMAT_R16G16B16A16_UINT 0x000000c9
-#define NV50_2D_SRC_FORMAT_R16G16B16A16_FLOAT 0x000000ca
-#define NV50_2D_SRC_FORMAT_R32G32_FLOAT 0x000000cb
-#define NV50_2D_SRC_FORMAT_R32G32_SINT 0x000000cc
-#define NV50_2D_SRC_FORMAT_R32G32_UINT 0x000000cd
-#define NV50_2D_SRC_FORMAT_R16G16B16X16_FLOAT 0x000000ce
-#define NV50_2D_SRC_FORMAT_A8R8G8B8_UNORM 0x000000cf
-#define NV50_2D_SRC_FORMAT_A8R8G8B8_SRGB 0x000000d0
-#define NV50_2D_SRC_FORMAT_A2B10G10R10_UNORM 0x000000d1
-#define NV50_2D_SRC_FORMAT_A2B10G10R10_UINT 0x000000d2
-#define NV50_2D_SRC_FORMAT_A8B8G8R8_UNORM 0x000000d5
-#define NV50_2D_SRC_FORMAT_A8B8G8R8_SRGB 0x000000d6
-#define NV50_2D_SRC_FORMAT_A8B8G8R8_SNORM 0x000000d7
-#define NV50_2D_SRC_FORMAT_A8B8G8R8_SINT 0x000000d8
-#define NV50_2D_SRC_FORMAT_A8B8G8R8_UINT 0x000000d9
-#define NV50_2D_SRC_FORMAT_R16G16_UNORM 0x000000da
-#define NV50_2D_SRC_FORMAT_R16G16_SNORM 0x000000db
-#define NV50_2D_SRC_FORMAT_R16G16_SINT 0x000000dc
-#define NV50_2D_SRC_FORMAT_R16G16_UINT 0x000000dd
-#define NV50_2D_SRC_FORMAT_R16G16_FLOAT 0x000000de
-#define NV50_2D_SRC_FORMAT_A2R10G10B10_UNORM 0x000000df
-#define NV50_2D_SRC_FORMAT_B10G11R11_FLOAT 0x000000e0
-#define NV50_2D_SRC_FORMAT_R32_FLOAT 0x000000e5
-#define NV50_2D_SRC_FORMAT_X8R8G8B8_UNORM 0x000000e6
-#define NV50_2D_SRC_FORMAT_X8R8G8B8_SRGB 0x000000e7
-#define NV50_2D_SRC_FORMAT_R5G6B5_UNORM 0x000000e8
-#define NV50_2D_SRC_FORMAT_A1R5G5B5_UNORM 0x000000e9
-#define NV50_2D_SRC_FORMAT_R8G8_UNORM 0x000000ea
-#define NV50_2D_SRC_FORMAT_R8G8_SNORM 0x000000eb
-#define NV50_2D_SRC_FORMAT_R8G8_SINT 0x000000ec
-#define NV50_2D_SRC_FORMAT_R8G8_UINT 0x000000ed
-#define NV50_2D_SRC_FORMAT_R16_UNORM 0x000000ee
-#define NV50_2D_SRC_FORMAT_R16_SNORM 0x000000ef
-#define NV50_2D_SRC_FORMAT_R16_SINT 0x000000f0
-#define NV50_2D_SRC_FORMAT_R16_UINT 0x000000f1
-#define NV50_2D_SRC_FORMAT_R16_FLOAT 0x000000f2
-#define NV50_2D_SRC_FORMAT_R8_UNORM 0x000000f3
-#define NV50_2D_SRC_FORMAT_R8_SNORM 0x000000f4
-#define NV50_2D_SRC_FORMAT_R8_SINT 0x000000f5
-#define NV50_2D_SRC_FORMAT_R8_UINT 0x000000f6
-#define NV50_2D_SRC_FORMAT_A8_UNORM 0x000000f7
-#define NV50_2D_SRC_FORMAT_X1R5G5B5_UNORM 0x000000f8
-#define NV50_2D_SRC_FORMAT_X8B8G8R8_UNORM 0x000000f9
-#define NV50_2D_SRC_FORMAT_X8B8G8R8_SRGB 0x000000fa
-#define NV50_2D_SRC_LINEAR 0x00000234
-#define NV50_2D_SRC_TILE_MODE 0x00000238
-#define NV50_2D_SRC_DEPTH 0x0000023c
-#define NV50_2D_SRC_LAYER 0x00000240
-#define NV50_2D_SRC_PITCH 0x00000244
-#define NV50_2D_SRC_WIDTH 0x00000248
-#define NV50_2D_SRC_HEIGHT 0x0000024c
-#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250
-#define NV50_2D_SRC_ADDRESS_LOW 0x00000254
-#define NV50_2D_COND_ADDRESS_HIGH 0x00000264
-#define NV50_2D_COND_ADDRESS_LOW 0x00000268
-#define NV50_2D_COND_MODE 0x0000026c
-#define NV50_2D_COND_MODE_NEVER 0x00000000
-#define NV50_2D_COND_MODE_ALWAYS 0x00000001
-#define NV50_2D_COND_MODE_RES 0x00000002
-#define NV50_2D_COND_MODE_NOT_RES_AND_NOT_ID 0x00000003
-#define NV50_2D_COND_MODE_RES_OR_ID 0x00000004
-#define NV50_2D_CLIP_X 0x00000280
-#define NV50_2D_CLIP_Y 0x00000284
-#define NV50_2D_CLIP_W 0x00000288
-#define NV50_2D_CLIP_H 0x0000028c
-#define NV50_2D_CLIP_ENABLE 0x00000290
-#define NV50_2D_COLOR_KEY_FORMAT 0x00000294
-#define NV50_2D_COLOR_KEY_FORMAT_16BPP 0x00000000
-#define NV50_2D_COLOR_KEY_FORMAT_15BPP 0x00000001
-#define NV50_2D_COLOR_KEY_FORMAT_24BPP 0x00000002
-#define NV50_2D_COLOR_KEY_FORMAT_30BPP 0x00000003
-#define NV50_2D_COLOR_KEY_FORMAT_8BPP 0x00000004
-#define NV50_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005
-#define NV50_2D_COLOR_KEY_FORMAT_32BPP 0x00000006
-#define NV50_2D_COLOR_KEY 0x00000298
-#define NV50_2D_COLOR_KEY_ENABLE 0x0000029c
-#define NV50_2D_ROP 0x000002a0
-#define NV50_2D_OPERATION 0x000002ac
-#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000
-#define NV50_2D_OPERATION_ROP_AND 0x00000001
-#define NV50_2D_OPERATION_BLEND_AND 0x00000002
-#define NV50_2D_OPERATION_SRCCOPY 0x00000003
-#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000004
-#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000005
-#define NV50_2D_PATTERN_FORMAT 0x000002e8
-#define NV50_2D_PATTERN_FORMAT_16BPP 0x00000000
-#define NV50_2D_PATTERN_FORMAT_15BPP 0x00000001
-#define NV50_2D_PATTERN_FORMAT_32BPP 0x00000002
-#define NV50_2D_PATTERN_FORMAT_8BPP 0x00000003
-#define NV50_2D_PATTERN_COLOR(x) (0x000002f0+((x)*4))
-#define NV50_2D_PATTERN_COLOR__SIZE 0x00000002
-#define NV50_2D_PATTERN_BITMAP(x) (0x000002f8+((x)*4))
-#define NV50_2D_PATTERN_BITMAP__SIZE 0x00000002
-#define NV50_2D_DRAW_SHAPE 0x00000580
-#define NV50_2D_DRAW_SHAPE_POINTS 0x00000000
-#define NV50_2D_DRAW_SHAPE_LINES 0x00000001
-#define NV50_2D_DRAW_SHAPE_LINE_STRIP 0x00000002
-#define NV50_2D_DRAW_SHAPE_TRIANGLES 0x00000003
-#define NV50_2D_DRAW_SHAPE_RECTANGLES 0x00000004
-#define NV50_2D_DRAW_COLOR_FORMAT 0x00000584
-#define NV50_2D_DRAW_COLOR_FORMAT_R32G32B32A32_FLOAT 0x000000c0
-#define NV50_2D_DRAW_COLOR_FORMAT_R32G32B32A32_SINT 0x000000c1
-#define NV50_2D_DRAW_COLOR_FORMAT_R32G32B32A32_UINT 0x000000c2
-#define NV50_2D_DRAW_COLOR_FORMAT_R32G32B32X32_FLOAT 0x000000c3
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16A16_UNORM 0x000000c6
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16A16_SNORM 0x000000c7
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16A16_SINT 0x000000c8
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16A16_UINT 0x000000c9
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16A16_FLOAT 0x000000ca
-#define NV50_2D_DRAW_COLOR_FORMAT_R32G32_FLOAT 0x000000cb
-#define NV50_2D_DRAW_COLOR_FORMAT_R32G32_SINT 0x000000cc
-#define NV50_2D_DRAW_COLOR_FORMAT_R32G32_UINT 0x000000cd
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16X16_FLOAT 0x000000ce
-#define NV50_2D_DRAW_COLOR_FORMAT_A8R8G8B8_UNORM 0x000000cf
-#define NV50_2D_DRAW_COLOR_FORMAT_A8R8G8B8_SRGB 0x000000d0
-#define NV50_2D_DRAW_COLOR_FORMAT_A2B10G10R10_UNORM 0x000000d1
-#define NV50_2D_DRAW_COLOR_FORMAT_A2B10G10R10_UINT 0x000000d2
-#define NV50_2D_DRAW_COLOR_FORMAT_A8B8G8R8_UNORM 0x000000d5
-#define NV50_2D_DRAW_COLOR_FORMAT_A8B8G8R8_SRGB 0x000000d6
-#define NV50_2D_DRAW_COLOR_FORMAT_A8B8G8R8_SNORM 0x000000d7
-#define NV50_2D_DRAW_COLOR_FORMAT_A8B8G8R8_SINT 0x000000d8
-#define NV50_2D_DRAW_COLOR_FORMAT_A8B8G8R8_UINT 0x000000d9
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16_UNORM 0x000000da
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16_SNORM 0x000000db
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16_SINT 0x000000dc
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16_UINT 0x000000dd
-#define NV50_2D_DRAW_COLOR_FORMAT_R16G16_FLOAT 0x000000de
-#define NV50_2D_DRAW_COLOR_FORMAT_A2R10G10B10_UNORM 0x000000df
-#define NV50_2D_DRAW_COLOR_FORMAT_B10G11R11_FLOAT 0x000000e0
-#define NV50_2D_DRAW_COLOR_FORMAT_R32_FLOAT 0x000000e5
-#define NV50_2D_DRAW_COLOR_FORMAT_X8R8G8B8_UNORM 0x000000e6
-#define NV50_2D_DRAW_COLOR_FORMAT_X8R8G8B8_SRGB 0x000000e7
-#define NV50_2D_DRAW_COLOR_FORMAT_R5G6B5_UNORM 0x000000e8
-#define NV50_2D_DRAW_COLOR_FORMAT_A1R5G5B5_UNORM 0x000000e9
-#define NV50_2D_DRAW_COLOR_FORMAT_R8G8_UNORM 0x000000ea
-#define NV50_2D_DRAW_COLOR_FORMAT_R8G8_SNORM 0x000000eb
-#define NV50_2D_DRAW_COLOR_FORMAT_R8G8_SINT 0x000000ec
-#define NV50_2D_DRAW_COLOR_FORMAT_R8G8_UINT 0x000000ed
-#define NV50_2D_DRAW_COLOR_FORMAT_R16_UNORM 0x000000ee
-#define NV50_2D_DRAW_COLOR_FORMAT_R16_SNORM 0x000000ef
-#define NV50_2D_DRAW_COLOR_FORMAT_R16_SINT 0x000000f0
-#define NV50_2D_DRAW_COLOR_FORMAT_R16_UINT 0x000000f1
-#define NV50_2D_DRAW_COLOR_FORMAT_R16_FLOAT 0x000000f2
-#define NV50_2D_DRAW_COLOR_FORMAT_R8_UNORM 0x000000f3
-#define NV50_2D_DRAW_COLOR_FORMAT_R8_SNORM 0x000000f4
-#define NV50_2D_DRAW_COLOR_FORMAT_R8_SINT 0x000000f5
-#define NV50_2D_DRAW_COLOR_FORMAT_R8_UINT 0x000000f6
-#define NV50_2D_DRAW_COLOR_FORMAT_A8_UNORM 0x000000f7
-#define NV50_2D_DRAW_COLOR_FORMAT_X1R5G5B5_UNORM 0x000000f8
-#define NV50_2D_DRAW_COLOR_FORMAT_X8B8G8R8_UNORM 0x000000f9
-#define NV50_2D_DRAW_COLOR_FORMAT_X8B8G8R8_SRGB 0x000000fa
-#define NV50_2D_DRAW_COLOR 0x00000588
-#define NV50_2D_DRAW_POINT16 0x000005e0
-#define NV50_2D_DRAW_POINT16_X_SHIFT 0
-#define NV50_2D_DRAW_POINT16_X_MASK 0x0000ffff
-#define NV50_2D_DRAW_POINT16_Y_SHIFT 16
-#define NV50_2D_DRAW_POINT16_Y_MASK 0xffff0000
-#define NV50_2D_DRAW_POINT32_X(x) (0x00000600+((x)*8))
-#define NV50_2D_DRAW_POINT32_X__SIZE 0x00000040
-#define NV50_2D_DRAW_POINT32_Y(x) (0x00000604+((x)*8))
-#define NV50_2D_DRAW_POINT32_Y__SIZE 0x00000040
-#define NV50_2D_SIFC_BITMAP_ENABLE 0x00000800
-#define NV50_2D_SIFC_FORMAT 0x00000804
-#define NV50_2D_SIFC_FORMAT_R32G32B32A32_FLOAT 0x000000c0
-#define NV50_2D_SIFC_FORMAT_R32G32B32A32_SINT 0x000000c1
-#define NV50_2D_SIFC_FORMAT_R32G32B32A32_UINT 0x000000c2
-#define NV50_2D_SIFC_FORMAT_R32G32B32X32_FLOAT 0x000000c3
-#define NV50_2D_SIFC_FORMAT_R16G16B16A16_UNORM 0x000000c6
-#define NV50_2D_SIFC_FORMAT_R16G16B16A16_SNORM 0x000000c7
-#define NV50_2D_SIFC_FORMAT_R16G16B16A16_SINT 0x000000c8
-#define NV50_2D_SIFC_FORMAT_R16G16B16A16_UINT 0x000000c9
-#define NV50_2D_SIFC_FORMAT_R16G16B16A16_FLOAT 0x000000ca
-#define NV50_2D_SIFC_FORMAT_R32G32_FLOAT 0x000000cb
-#define NV50_2D_SIFC_FORMAT_R32G32_SINT 0x000000cc
-#define NV50_2D_SIFC_FORMAT_R32G32_UINT 0x000000cd
-#define NV50_2D_SIFC_FORMAT_R16G16B16X16_FLOAT 0x000000ce
-#define NV50_2D_SIFC_FORMAT_A8R8G8B8_UNORM 0x000000cf
-#define NV50_2D_SIFC_FORMAT_A8R8G8B8_SRGB 0x000000d0
-#define NV50_2D_SIFC_FORMAT_A2B10G10R10_UNORM 0x000000d1
-#define NV50_2D_SIFC_FORMAT_A2B10G10R10_UINT 0x000000d2
-#define NV50_2D_SIFC_FORMAT_A8B8G8R8_UNORM 0x000000d5
-#define NV50_2D_SIFC_FORMAT_A8B8G8R8_SRGB 0x000000d6
-#define NV50_2D_SIFC_FORMAT_A8B8G8R8_SNORM 0x000000d7
-#define NV50_2D_SIFC_FORMAT_A8B8G8R8_SINT 0x000000d8
-#define NV50_2D_SIFC_FORMAT_A8B8G8R8_UINT 0x000000d9
-#define NV50_2D_SIFC_FORMAT_R16G16_UNORM 0x000000da
-#define NV50_2D_SIFC_FORMAT_R16G16_SNORM 0x000000db
-#define NV50_2D_SIFC_FORMAT_R16G16_SINT 0x000000dc
-#define NV50_2D_SIFC_FORMAT_R16G16_UINT 0x000000dd
-#define NV50_2D_SIFC_FORMAT_R16G16_FLOAT 0x000000de
-#define NV50_2D_SIFC_FORMAT_A2R10G10B10_UNORM 0x000000df
-#define NV50_2D_SIFC_FORMAT_B10G11R11_FLOAT 0x000000e0
-#define NV50_2D_SIFC_FORMAT_R32_FLOAT 0x000000e5
-#define NV50_2D_SIFC_FORMAT_X8R8G8B8_UNORM 0x000000e6
-#define NV50_2D_SIFC_FORMAT_X8R8G8B8_SRGB 0x000000e7
-#define NV50_2D_SIFC_FORMAT_R5G6B5_UNORM 0x000000e8
-#define NV50_2D_SIFC_FORMAT_A1R5G5B5_UNORM 0x000000e9
-#define NV50_2D_SIFC_FORMAT_R8G8_UNORM 0x000000ea
-#define NV50_2D_SIFC_FORMAT_R8G8_SNORM 0x000000eb
-#define NV50_2D_SIFC_FORMAT_R8G8_SINT 0x000000ec
-#define NV50_2D_SIFC_FORMAT_R8G8_UINT 0x000000ed
-#define NV50_2D_SIFC_FORMAT_R16_UNORM 0x000000ee
-#define NV50_2D_SIFC_FORMAT_R16_SNORM 0x000000ef
-#define NV50_2D_SIFC_FORMAT_R16_SINT 0x000000f0
-#define NV50_2D_SIFC_FORMAT_R16_UINT 0x000000f1
-#define NV50_2D_SIFC_FORMAT_R16_FLOAT 0x000000f2
-#define NV50_2D_SIFC_FORMAT_R8_UNORM 0x000000f3
-#define NV50_2D_SIFC_FORMAT_R8_SNORM 0x000000f4
-#define NV50_2D_SIFC_FORMAT_R8_SINT 0x000000f5
-#define NV50_2D_SIFC_FORMAT_R8_UINT 0x000000f6
-#define NV50_2D_SIFC_FORMAT_A8_UNORM 0x000000f7
-#define NV50_2D_SIFC_FORMAT_X1R5G5B5_UNORM 0x000000f8
-#define NV50_2D_SIFC_FORMAT_X8B8G8R8_UNORM 0x000000f9
-#define NV50_2D_SIFC_FORMAT_X8B8G8R8_SRGB 0x000000fa
-#define NV50_2D_SIFC_BITMAP_UNK808 0x00000808
-#define NV50_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c
-#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810
-#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000
-#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001
-#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002
-#define NV50_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814
-#define NV50_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818
-#define NV50_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c
-#define NV50_2D_SIFC_WIDTH 0x00000838
-#define NV50_2D_SIFC_HEIGHT 0x0000083c
-#define NV50_2D_SIFC_DX_DU_FRACT 0x00000840
-#define NV50_2D_SIFC_DX_DU_INT 0x00000844
-#define NV50_2D_SIFC_DY_DV_FRACT 0x00000848
-#define NV50_2D_SIFC_DY_DV_INT 0x0000084c
-#define NV50_2D_SIFC_DST_X_FRACT 0x00000850
-#define NV50_2D_SIFC_DST_X_INT 0x00000854
-#define NV50_2D_SIFC_DST_Y_FRACT 0x00000858
-#define NV50_2D_SIFC_DST_Y_INT 0x0000085c
-#define NV50_2D_SIFC_DATA 0x00000860
-#define NV50_2D_BLIT_DST_X 0x000008b0
-#define NV50_2D_BLIT_DST_Y 0x000008b4
-#define NV50_2D_BLIT_DST_W 0x000008b8
-#define NV50_2D_BLIT_DST_H 0x000008bc
-#define NV50_2D_BLIT_DU_DX_FRACT 0x000008c0
-#define NV50_2D_BLIT_DU_DX_INT 0x000008c4
-#define NV50_2D_BLIT_DV_DY_FRACT 0x000008c8
-#define NV50_2D_BLIT_DV_DY_INT 0x000008cc
-#define NV50_2D_BLIT_SRC_X_FRACT 0x000008d0
-#define NV50_2D_BLIT_SRC_X_INT 0x000008d4
-#define NV50_2D_BLIT_SRC_Y_FRACT 0x000008d8
-#define NV50_2D_BLIT_SRC_Y_INT 0x000008dc
-
-
-#define NV50TCL 0x00005097
-
-#define NV50TCL_NOP 0x00000100
-#define NV50TCL_NOTIFY 0x00000104
-#define NV50TCL_SERIALIZE 0x00000110
-#define NV50TCL_DMA_NOTIFY 0x00000180
-#define NV50TCL_DMA_ZETA 0x00000184
-#define NV50TCL_DMA_QUERY 0x00000188
-#define NV50TCL_DMA_VTXBUF0 0x0000018c
-#define NV50TCL_DMA_LOCAL 0x00000190
-#define NV50TCL_DMA_STACK 0x00000194
-#define NV50TCL_DMA_CODE_CB 0x00000198
-#define NV50TCL_DMA_TSC 0x0000019c
-#define NV50TCL_DMA_TIC 0x000001a0
-#define NV50TCL_DMA_TEXTURE 0x000001a4
-#define NV50TCL_DMA_STRMOUT 0x000001a8
-#define NV50TCL_DMA_CLIPID 0x000001ac
-#define NV50TCL_DMA_COLOR(x) (0x000001c0+((x)*4))
-#define NV50TCL_DMA_COLOR__SIZE 0x00000008
-#define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32))
-#define NV50TCL_RT_ADDRESS_HIGH__SIZE 0x00000008
-#define NV50TCL_RT_ADDRESS_LOW(x) (0x00000204+((x)*32))
-#define NV50TCL_RT_ADDRESS_LOW__SIZE 0x00000008
-#define NV50TCL_RT_FORMAT(x) (0x00000208+((x)*32))
-#define NV50TCL_RT_FORMAT__SIZE 0x00000008
-#define NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT 0x000000c0
-#define NV50TCL_RT_FORMAT_R32G32B32A32_SINT 0x000000c1
-#define NV50TCL_RT_FORMAT_R32G32B32A32_UINT 0x000000c2
-#define NV50TCL_RT_FORMAT_R32G32B32X32_FLOAT 0x000000c3
-#define NV50TCL_RT_FORMAT_R16G16B16A16_UNORM 0x000000c6
-#define NV50TCL_RT_FORMAT_R16G16B16A16_SNORM 0x000000c7
-#define NV50TCL_RT_FORMAT_R16G16B16A16_SINT 0x000000c8
-#define NV50TCL_RT_FORMAT_R16G16B16A16_UINT 0x000000c9
-#define NV50TCL_RT_FORMAT_R16G16B16A16_FLOAT 0x000000ca
-#define NV50TCL_RT_FORMAT_R32G32_FLOAT 0x000000cb
-#define NV50TCL_RT_FORMAT_R32G32_SINT 0x000000cc
-#define NV50TCL_RT_FORMAT_R32G32_UINT 0x000000cd
-#define NV50TCL_RT_FORMAT_R16G16B16X16_FLOAT 0x000000ce
-#define NV50TCL_RT_FORMAT_A8R8G8B8_UNORM 0x000000cf
-#define NV50TCL_RT_FORMAT_A8R8G8B8_SRGB 0x000000d0
-#define NV50TCL_RT_FORMAT_A2B10G10R10_UNORM 0x000000d1
-#define NV50TCL_RT_FORMAT_A2B10G10R10_UINT 0x000000d2
-#define NV50TCL_RT_FORMAT_A8B8G8R8_UNORM 0x000000d5
-#define NV50TCL_RT_FORMAT_A8B8G8R8_SRGB 0x000000d6
-#define NV50TCL_RT_FORMAT_A8B8G8R8_SNORM 0x000000d7
-#define NV50TCL_RT_FORMAT_A8B8G8R8_SINT 0x000000d8
-#define NV50TCL_RT_FORMAT_A8B8G8R8_UINT 0x000000d9
-#define NV50TCL_RT_FORMAT_R16G16_UNORM 0x000000da
-#define NV50TCL_RT_FORMAT_R16G16_SNORM 0x000000db
-#define NV50TCL_RT_FORMAT_R16G16_SINT 0x000000dc
-#define NV50TCL_RT_FORMAT_R16G16_UINT 0x000000dd
-#define NV50TCL_RT_FORMAT_R16G16_FLOAT 0x000000de
-#define NV50TCL_RT_FORMAT_A2R10G10B10_UNORM 0x000000df
-#define NV50TCL_RT_FORMAT_B10G11R11_FLOAT 0x000000e0
-#define NV50TCL_RT_FORMAT_R32_FLOAT 0x000000e5
-#define NV50TCL_RT_FORMAT_X8R8G8B8_UNORM 0x000000e6
-#define NV50TCL_RT_FORMAT_X8R8G8B8_SRGB 0x000000e7
-#define NV50TCL_RT_FORMAT_R5G6B5_UNORM 0x000000e8
-#define NV50TCL_RT_FORMAT_A1R5G5B5_UNORM 0x000000e9
-#define NV50TCL_RT_FORMAT_R8G8_UNORM 0x000000ea
-#define NV50TCL_RT_FORMAT_R8G8_SNORM 0x000000eb
-#define NV50TCL_RT_FORMAT_R8G8_SINT 0x000000ec
-#define NV50TCL_RT_FORMAT_R8G8_UINT 0x000000ed
-#define NV50TCL_RT_FORMAT_R16_UNORM 0x000000ee
-#define NV50TCL_RT_FORMAT_R16_SNORM 0x000000ef
-#define NV50TCL_RT_FORMAT_R16_SINT 0x000000f0
-#define NV50TCL_RT_FORMAT_R16_UINT 0x000000f1
-#define NV50TCL_RT_FORMAT_R16_FLOAT 0x000000f2
-#define NV50TCL_RT_FORMAT_R8_UNORM 0x000000f3
-#define NV50TCL_RT_FORMAT_R8_SNORM 0x000000f4
-#define NV50TCL_RT_FORMAT_R8_SINT 0x000000f5
-#define NV50TCL_RT_FORMAT_R8_UINT 0x000000f6
-#define NV50TCL_RT_FORMAT_A8_UNORM 0x000000f7
-#define NV50TCL_RT_FORMAT_X1R5G5B5_UNORM 0x000000f8
-#define NV50TCL_RT_FORMAT_X8B8G8R8_UNORM 0x000000f9
-#define NV50TCL_RT_FORMAT_X8B8G8R8_SRGB 0x000000fa
-#define NV50TCL_RT_TILE_MODE(x) (0x0000020c+((x)*32))
-#define NV50TCL_RT_TILE_MODE__SIZE 0x00000008
-#define NV50TCL_RT_LAYER_STRIDE(x) (0x00000210+((x)*32))
-#define NV50TCL_RT_LAYER_STRIDE__SIZE 0x00000008
-#define NV50TCL_VTX_ATTR_1F(x) (0x00000300+((x)*4))
-#define NV50TCL_VTX_ATTR_1F__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_2H(x) (0x00000340+((x)*4))
-#define NV50TCL_VTX_ATTR_2H__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_2H_X_SHIFT 0
-#define NV50TCL_VTX_ATTR_2H_X_MASK 0x0000ffff
-#define NV50TCL_VTX_ATTR_2H_Y_SHIFT 16
-#define NV50TCL_VTX_ATTR_2H_Y_MASK 0xffff0000
-#define NV50TCL_VTX_ATTR_2F_X(x) (0x00000380+((x)*8))
-#define NV50TCL_VTX_ATTR_2F_X__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_2F_Y(x) (0x00000384+((x)*8))
-#define NV50TCL_VTX_ATTR_2F_Y__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_3F_X(x) (0x00000400+((x)*16))
-#define NV50TCL_VTX_ATTR_3F_X__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_3F_Y(x) (0x00000404+((x)*16))
-#define NV50TCL_VTX_ATTR_3F_Y__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_3F_Z(x) (0x00000408+((x)*16))
-#define NV50TCL_VTX_ATTR_3F_Z__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4F_X(x) (0x00000500+((x)*16))
-#define NV50TCL_VTX_ATTR_4F_X__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4F_Y(x) (0x00000504+((x)*16))
-#define NV50TCL_VTX_ATTR_4F_Y__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4F_Z(x) (0x00000508+((x)*16))
-#define NV50TCL_VTX_ATTR_4F_Z__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4F_W(x) (0x0000050c+((x)*16))
-#define NV50TCL_VTX_ATTR_4F_W__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4H_0(x) (0x00000600+((x)*8))
-#define NV50TCL_VTX_ATTR_4H_0__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4H_0_X_SHIFT 0
-#define NV50TCL_VTX_ATTR_4H_0_X_MASK 0x0000ffff
-#define NV50TCL_VTX_ATTR_4H_0_Y_SHIFT 16
-#define NV50TCL_VTX_ATTR_4H_0_Y_MASK 0xffff0000
-#define NV50TCL_VTX_ATTR_4H_1(x) (0x00000604+((x)*8))
-#define NV50TCL_VTX_ATTR_4H_1__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4H_1_Z_SHIFT 0
-#define NV50TCL_VTX_ATTR_4H_1_Z_MASK 0x0000ffff
-#define NV50TCL_VTX_ATTR_4H_1_W_SHIFT 16
-#define NV50TCL_VTX_ATTR_4H_1_W_MASK 0xffff0000
-#define NV50TCL_VTX_ATTR_2I(x) (0x00000680+((x)*4))
-#define NV50TCL_VTX_ATTR_2I__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_2I_X_SHIFT 0
-#define NV50TCL_VTX_ATTR_2I_X_MASK 0x0000ffff
-#define NV50TCL_VTX_ATTR_2I_Y_SHIFT 16
-#define NV50TCL_VTX_ATTR_2I_Y_MASK 0xffff0000
-#define NV50TCL_VTX_ATTR_2NI(x) (0x000006c0+((x)*4))
-#define NV50TCL_VTX_ATTR_2NI__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_2NI_X_SHIFT 0
-#define NV50TCL_VTX_ATTR_2NI_X_MASK 0x0000ffff
-#define NV50TCL_VTX_ATTR_2NI_Y_SHIFT 16
-#define NV50TCL_VTX_ATTR_2NI_Y_MASK 0xffff0000
-#define NV50TCL_VTX_ATTR_4I_0(x) (0x00000700+((x)*8))
-#define NV50TCL_VTX_ATTR_4I_0__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4I_0_X_SHIFT 0
-#define NV50TCL_VTX_ATTR_4I_0_X_MASK 0x0000ffff
-#define NV50TCL_VTX_ATTR_4I_0_Y_SHIFT 16
-#define NV50TCL_VTX_ATTR_4I_0_Y_MASK 0xffff0000
-#define NV50TCL_VTX_ATTR_4I_1(x) (0x00000704+((x)*8))
-#define NV50TCL_VTX_ATTR_4I_1__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4I_1_Z_SHIFT 0
-#define NV50TCL_VTX_ATTR_4I_1_Z_MASK 0x0000ffff
-#define NV50TCL_VTX_ATTR_4I_1_W_SHIFT 16
-#define NV50TCL_VTX_ATTR_4I_1_W_MASK 0xffff0000
-#define NV50TCL_VTX_ATTR_4NI_0(x) (0x00000780+((x)*8))
-#define NV50TCL_VTX_ATTR_4NI_0__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4NI_0_X_SHIFT 0
-#define NV50TCL_VTX_ATTR_4NI_0_X_MASK 0x0000ffff
-#define NV50TCL_VTX_ATTR_4NI_0_Y_SHIFT 16
-#define NV50TCL_VTX_ATTR_4NI_0_Y_MASK 0xffff0000
-#define NV50TCL_VTX_ATTR_4NI_1(x) (0x00000784+((x)*8))
-#define NV50TCL_VTX_ATTR_4NI_1__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4NI_1_Z_SHIFT 0
-#define NV50TCL_VTX_ATTR_4NI_1_Z_MASK 0x0000ffff
-#define NV50TCL_VTX_ATTR_4NI_1_W_SHIFT 16
-#define NV50TCL_VTX_ATTR_4NI_1_W_MASK 0xffff0000
-#define NV50TCL_VTX_ATTR_4UB(x) (0x00000800+((x)*4))
-#define NV50TCL_VTX_ATTR_4UB__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4UB_X_SHIFT 0
-#define NV50TCL_VTX_ATTR_4UB_X_MASK 0x000000ff
-#define NV50TCL_VTX_ATTR_4UB_Y_SHIFT 8
-#define NV50TCL_VTX_ATTR_4UB_Y_MASK 0x0000ff00
-#define NV50TCL_VTX_ATTR_4UB_Z_SHIFT 16
-#define NV50TCL_VTX_ATTR_4UB_Z_MASK 0x00ff0000
-#define NV50TCL_VTX_ATTR_4UB_W_SHIFT 24
-#define NV50TCL_VTX_ATTR_4UB_W_MASK 0xff000000
-#define NV50TCL_VTX_ATTR_4B(x) (0x00000840+((x)*4))
-#define NV50TCL_VTX_ATTR_4B__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4B_X_SHIFT 0
-#define NV50TCL_VTX_ATTR_4B_X_MASK 0x000000ff
-#define NV50TCL_VTX_ATTR_4B_Y_SHIFT 8
-#define NV50TCL_VTX_ATTR_4B_Y_MASK 0x0000ff00
-#define NV50TCL_VTX_ATTR_4B_Z_SHIFT 16
-#define NV50TCL_VTX_ATTR_4B_Z_MASK 0x00ff0000
-#define NV50TCL_VTX_ATTR_4B_W_SHIFT 24
-#define NV50TCL_VTX_ATTR_4B_W_MASK 0xff000000
-#define NV50TCL_VTX_ATTR_4NUB(x) (0x00000880+((x)*4))
-#define NV50TCL_VTX_ATTR_4NUB__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4NUB_X_SHIFT 0
-#define NV50TCL_VTX_ATTR_4NUB_X_MASK 0x000000ff
-#define NV50TCL_VTX_ATTR_4NUB_Y_SHIFT 8
-#define NV50TCL_VTX_ATTR_4NUB_Y_MASK 0x0000ff00
-#define NV50TCL_VTX_ATTR_4NUB_Z_SHIFT 16
-#define NV50TCL_VTX_ATTR_4NUB_Z_MASK 0x00ff0000
-#define NV50TCL_VTX_ATTR_4NUB_W_SHIFT 24
-#define NV50TCL_VTX_ATTR_4NUB_W_MASK 0xff000000
-#define NV50TCL_VTX_ATTR_4NB(x) (0x000008c0+((x)*4))
-#define NV50TCL_VTX_ATTR_4NB__SIZE 0x00000010
-#define NV50TCL_VTX_ATTR_4NB_X_SHIFT 0
-#define NV50TCL_VTX_ATTR_4NB_X_MASK 0x000000ff
-#define NV50TCL_VTX_ATTR_4NB_Y_SHIFT 8
-#define NV50TCL_VTX_ATTR_4NB_Y_MASK 0x0000ff00
-#define NV50TCL_VTX_ATTR_4NB_Z_SHIFT 16
-#define NV50TCL_VTX_ATTR_4NB_Z_MASK 0x00ff0000
-#define NV50TCL_VTX_ATTR_4NB_W_SHIFT 24
-#define NV50TCL_VTX_ATTR_4NB_W_MASK 0xff000000
-#define NV50TCL_VERTEX_ARRAY_FORMAT(x) (0x00000900+((x)*16))
-#define NV50TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010
-#define NV50TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT 0
-#define NV50TCL_VERTEX_ARRAY_FORMAT_STRIDE_MASK 0x00000fff
-#define NV50TCL_VERTEX_ARRAY_FORMAT_ENABLE (1 << 29)
-#define NV50TCL_VERTEX_ARRAY_START_HIGH(x) (0x00000904+((x)*16))
-#define NV50TCL_VERTEX_ARRAY_START_HIGH__SIZE 0x00000010
-#define NV50TCL_VERTEX_ARRAY_START_LOW(x) (0x00000908+((x)*16))
-#define NV50TCL_VERTEX_ARRAY_START_LOW__SIZE 0x00000010
-#define NV50TCL_VIEWPORT_SCALE_X(x) (0x00000a00+((x)*32))
-#define NV50TCL_VIEWPORT_SCALE_X__SIZE 0x00000010
-#define NV50TCL_VIEWPORT_SCALE_Y(x) (0x00000a04+((x)*32))
-#define NV50TCL_VIEWPORT_SCALE_Y__SIZE 0x00000010
-#define NV50TCL_VIEWPORT_SCALE_Z(x) (0x00000a08+((x)*32))
-#define NV50TCL_VIEWPORT_SCALE_Z__SIZE 0x00000010
-#define NV50TCL_VIEWPORT_TRANSLATE_X(x) (0x00000a0c+((x)*32))
-#define NV50TCL_VIEWPORT_TRANSLATE_X__SIZE 0x00000010
-#define NV50TCL_VIEWPORT_TRANSLATE_Y(x) (0x00000a10+((x)*32))
-#define NV50TCL_VIEWPORT_TRANSLATE_Y__SIZE 0x00000010
-#define NV50TCL_VIEWPORT_TRANSLATE_Z(x) (0x00000a14+((x)*32))
-#define NV50TCL_VIEWPORT_TRANSLATE_Z__SIZE 0x00000010
-#define NV50TCL_VIEWPORT_HORIZ(x) (0x00000c00+((x)*16))
-#define NV50TCL_VIEWPORT_HORIZ__SIZE 0x00000010
-#define NV50TCL_VIEWPORT_HORIZ_X_SHIFT 0
-#define NV50TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff
-#define NV50TCL_VIEWPORT_HORIZ_W_SHIFT 16
-#define NV50TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000
-#define NV50TCL_VIEWPORT_VERT(x) (0x00000c04+((x)*16))
-#define NV50TCL_VIEWPORT_VERT__SIZE 0x00000010
-#define NV50TCL_VIEWPORT_VERT_Y_SHIFT 0
-#define NV50TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff
-#define NV50TCL_VIEWPORT_VERT_H_SHIFT 16
-#define NV50TCL_VIEWPORT_VERT_H_MASK 0xffff0000
-#define NV50TCL_DEPTH_RANGE_NEAR(x) (0x00000c08+((x)*16))
-#define NV50TCL_DEPTH_RANGE_NEAR__SIZE 0x00000010
-#define NV50TCL_DEPTH_RANGE_FAR(x) (0x00000c0c+((x)*16))
-#define NV50TCL_DEPTH_RANGE_FAR__SIZE 0x00000010
-#define NV50TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8))
-#define NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
-#define NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_SHIFT 0
-#define NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_MASK 0x0000ffff
-#define NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_SHIFT 16
-#define NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_MASK 0xffff0000
-#define NV50TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8))
-#define NV50TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
-#define NV50TCL_VIEWPORT_CLIP_VERT_MIN_SHIFT 0
-#define NV50TCL_VIEWPORT_CLIP_VERT_MIN_MASK 0x0000ffff
-#define NV50TCL_VIEWPORT_CLIP_VERT_MAX_SHIFT 16
-#define NV50TCL_VIEWPORT_CLIP_VERT_MAX_MASK 0xffff0000
-#define NV50TCL_CLIPID_REGION_HORIZ(x) (0x00000d40+((x)*8))
-#define NV50TCL_CLIPID_REGION_HORIZ__SIZE 0x00000004
-#define NV50TCL_CLIPID_REGION_VERT(x) (0x00000d44+((x)*8))
-#define NV50TCL_CLIPID_REGION_VERT__SIZE 0x00000004
-#define NV50TCL_VERTEX_BUFFER_FIRST 0x00000d74
-#define NV50TCL_VERTEX_BUFFER_COUNT 0x00000d78
-#define NV50TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4))
-#define NV50TCL_CLEAR_COLOR__SIZE 0x00000004
-#define NV50TCL_CLEAR_DEPTH 0x00000d90
-#define NV50TCL_STACK_ADDRESS_HIGH 0x00000d94
-#define NV50TCL_STACK_ADDRESS_LOW 0x00000d98
-#define NV50TCL_STACK_SIZE_LOG 0x00000d9c
-#define NV50TCL_CLEAR_STENCIL 0x00000da0
-#define NV50TCL_STRMOUT_PRIMITIVE_COUNT 0x00000da8
-#define NV50TCL_POLYGON_MODE_FRONT 0x00000dac
-#define NV50TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
-#define NV50TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
-#define NV50TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
-#define NV50TCL_POLYGON_MODE_BACK 0x00000db0
-#define NV50TCL_POLYGON_MODE_BACK_POINT 0x00001b00
-#define NV50TCL_POLYGON_MODE_BACK_LINE 0x00001b01
-#define NV50TCL_POLYGON_MODE_BACK_FILL 0x00001b02
-#define NV50TCL_POLYGON_SMOOTH_ENABLE 0x00000db4
-#define NV50TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
-#define NV50TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
-#define NV50TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
-#define NV50TCL_WATCHDOG_TIMER 0x00000de4
-#define NV50TCL_WINDOW_OFFSET_X 0x00000df8
-#define NV50TCL_WINDOW_OFFSET_Y 0x00000dfc
-#define NV50TCL_SCISSOR_ENABLE(x) (0x00000e00+((x)*16))
-#define NV50TCL_SCISSOR_ENABLE__SIZE 0x00000010
-#define NV50TCL_SCISSOR_HORIZ(x) (0x00000e04+((x)*16))
-#define NV50TCL_SCISSOR_HORIZ__SIZE 0x00000010
-#define NV50TCL_SCISSOR_HORIZ_MIN_SHIFT 0
-#define NV50TCL_SCISSOR_HORIZ_MIN_MASK 0x0000ffff
-#define NV50TCL_SCISSOR_HORIZ_MAX_SHIFT 16
-#define NV50TCL_SCISSOR_HORIZ_MAX_MASK 0xffff0000
-#define NV50TCL_SCISSOR_VERT(x) (0x00000e08+((x)*16))
-#define NV50TCL_SCISSOR_VERT__SIZE 0x00000010
-#define NV50TCL_SCISSOR_VERT_MIN_SHIFT 0
-#define NV50TCL_SCISSOR_VERT_MIN_MASK 0x0000ffff
-#define NV50TCL_SCISSOR_VERT_MAX_SHIFT 16
-#define NV50TCL_SCISSOR_VERT_MAX_MASK 0xffff0000
-#define NV50TCL_CB_ADDR 0x00000f00
-#define NV50TCL_CB_ADDR_ID_SHIFT 8
-#define NV50TCL_CB_ADDR_ID_MASK 0x003fff00
-#define NV50TCL_CB_ADDR_BUFFER_SHIFT 0
-#define NV50TCL_CB_ADDR_BUFFER_MASK 0x0000007f
-#define NV50TCL_CB_DATA(x) (0x00000f04+((x)*4))
-#define NV50TCL_CB_DATA__SIZE 0x00000010
-#define NV50TCL_LOCAL_WARPS_LOG_ALLOC 0x00000f44
-#define NV50TCL_LOCAL_WARPS_NO_CLAMP 0x00000f48
-#define NV50TCL_STACK_WARPS_LOG_ALLOC 0x00000f4c
-#define NV50TCL_STACK_WARPS_NO_CLAMP 0x00000f50
-#define NV50TCL_STENCIL_BACK_FUNC_REF 0x00000f54
-#define NV50TCL_STENCIL_BACK_MASK 0x00000f58
-#define NV50TCL_STENCIL_BACK_FUNC_MASK 0x00000f5c
-#define NV50TCL_GP_ADDRESS_HIGH 0x00000f70
-#define NV50TCL_GP_ADDRESS_LOW 0x00000f74
-#define NV50TCL_VP_ADDRESS_HIGH 0x00000f7c
-#define NV50TCL_VP_ADDRESS_LOW 0x00000f80
-#define NV50TCL_VERTEX_RUNOUT_HIGH 0x00000f84
-#define NV50TCL_VERTEX_RUNOUT_LOW 0x00000f88
-#define NV50TCL_DEPTH_BOUNDS(x) (0x00000f9c+((x)*4))
-#define NV50TCL_DEPTH_BOUNDS__SIZE 0x00000002
-#define NV50TCL_FP_ADDRESS_HIGH 0x00000fa4
-#define NV50TCL_FP_ADDRESS_LOW 0x00000fa8
-#define NV50TCL_MSAA_MASK(x) (0x00000fbc+((x)*4))
-#define NV50TCL_MSAA_MASK__SIZE 0x00000004
-#define NV50TCL_CLIPID_ADDRESS_HIGH 0x00000fcc
-#define NV50TCL_CLIPID_ADDRESS_LOW 0x00000fd0
-#define NV50TCL_ZETA_ADDRESS_HIGH 0x00000fe0
-#define NV50TCL_ZETA_ADDRESS_LOW 0x00000fe4
-#define NV50TCL_ZETA_FORMAT 0x00000fe8
-#define NV50TCL_ZETA_FORMAT_Z32_FLOAT 0x0000000a
-#define NV50TCL_ZETA_FORMAT_Z16_UNORM 0x00000013
-#define NV50TCL_ZETA_FORMAT_Z24S8_UNORM 0x00000014
-#define NV50TCL_ZETA_FORMAT_X8Z24_UNORM 0x00000015
-#define NV50TCL_ZETA_FORMAT_S8Z24_UNORM 0x00000016
-#define NV50TCL_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019
-#define NV50TCL_ZETA_TILE_MODE 0x00000fec
-#define NV50TCL_ZETA_LAYER_STRIDE 0x00000ff0
-#define NV50TCL_SCREEN_SCISSOR_HORIZ 0x00000ff4
-#define NV50TCL_SCREEN_SCISSOR_HORIZ_W_SHIFT 16
-#define NV50TCL_SCREEN_SCISSOR_HORIZ_W_MASK 0xffff0000
-#define NV50TCL_SCREEN_SCISSOR_HORIZ_X_SHIFT 0
-#define NV50TCL_SCREEN_SCISSOR_HORIZ_X_MASK 0x0000ffff
-#define NV50TCL_SCREEN_SCISSOR_VERT 0x00000ff8
-#define NV50TCL_SCREEN_SCISSOR_VERT_H_SHIFT 16
-#define NV50TCL_SCREEN_SCISSOR_VERT_H_MASK 0xffff0000
-#define NV50TCL_SCREEN_SCISSOR_VERT_Y_SHIFT 0
-#define NV50TCL_SCREEN_SCISSOR_VERT_Y_MASK 0x0000ffff
-#define NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(x) (0x00001080+((x)*8))
-#define NV50TCL_VERTEX_ARRAY_LIMIT_HIGH__SIZE 0x00000010
-#define NV50TCL_VERTEX_ARRAY_LIMIT_LOW(x) (0x00001084+((x)*8))
-#define NV50TCL_VERTEX_ARRAY_LIMIT_LOW__SIZE 0x00000010
-#define NV50TCL_RT_CONTROL 0x0000121c
-#define NV50TCL_RT_CONTROL_COUNT_SHIFT 0
-#define NV50TCL_RT_CONTROL_COUNT_MASK 0x0000000f
-#define NV50TCL_RT_CONTROL_MAP0_SHIFT 4
-#define NV50TCL_RT_CONTROL_MAP0_MASK 0x00000070
-#define NV50TCL_RT_CONTROL_MAP1_SHIFT 7
-#define NV50TCL_RT_CONTROL_MAP1_MASK 0x00000380
-#define NV50TCL_RT_CONTROL_MAP2_SHIFT 10
-#define NV50TCL_RT_CONTROL_MAP2_MASK 0x00001c00
-#define NV50TCL_RT_CONTROL_MAP3_SHIFT 13
-#define NV50TCL_RT_CONTROL_MAP3_MASK 0x0000e000
-#define NV50TCL_RT_CONTROL_MAP4_SHIFT 16
-#define NV50TCL_RT_CONTROL_MAP4_MASK 0x00070000
-#define NV50TCL_RT_CONTROL_MAP5_SHIFT 19
-#define NV50TCL_RT_CONTROL_MAP5_MASK 0x00380000
-#define NV50TCL_RT_CONTROL_MAP6_SHIFT 22
-#define NV50TCL_RT_CONTROL_MAP6_MASK 0x01c00000
-#define NV50TCL_RT_CONTROL_MAP7_SHIFT 25
-#define NV50TCL_RT_CONTROL_MAP7_MASK 0x0e000000
-#define NV50TCL_RT_ARRAY_MODE 0x00001224
-#define NV50TCL_RT_ARRAY_MODE_LAYERS_SHIFT 0
-#define NV50TCL_RT_ARRAY_MODE_LAYERS_MASK 0x0000ffff
-#define NV50TCL_RT_ARRAY_MODE_VOLUME (1 << 16)
-#define NV50TCL_ZETA_HORIZ 0x00001228
-#define NV50TCL_ZETA_VERT 0x0000122c
-#define NV50TCL_ZETA_ARRAY_MODE 0x00001230
-#define NV50TCL_ZETA_ARRAY_MODE_LAYERS_SHIFT 0
-#define NV50TCL_ZETA_ARRAY_MODE_LAYERS_MASK 0x0000ffff
-#define NV50TCL_ZETA_ARRAY_MODE_UNK (1 << 16)
-#define NV50TCL_LINKED_TSC 0x00001234
-#define NV50TCL_RT_HORIZ(x) (0x00001240+((x)*8))
-#define NV50TCL_RT_HORIZ__SIZE 0x00000008
-#define NV50TCL_RT_VERT(x) (0x00001244+((x)*8))
-#define NV50TCL_RT_VERT__SIZE 0x00000008
-#define NV50TCL_CB_DEF_ADDRESS_HIGH 0x00001280
-#define NV50TCL_CB_DEF_ADDRESS_LOW 0x00001284
-#define NV50TCL_CB_DEF_SET 0x00001288
-#define NV50TCL_CB_DEF_SET_SIZE_SHIFT 0
-#define NV50TCL_CB_DEF_SET_SIZE_MASK 0x0000ffff
-#define NV50TCL_CB_DEF_SET_BUFFER_SHIFT 16
-#define NV50TCL_CB_DEF_SET_BUFFER_MASK 0x007f0000
-#define NV50TCL_STRMOUT_BUFFERS_CTRL 0x00001294
-#define NV50TCL_STRMOUT_BUFFERS_CTRL_INTERLEAVED (1 << 0)
-#define NV50TCL_STRMOUT_BUFFERS_CTRL_SEPARATE_SHIFT 4
-#define NV50TCL_STRMOUT_BUFFERS_CTRL_SEPARATE_MASK 0x000000f0
-#define NV50TCL_STRMOUT_BUFFERS_CTRL_STRIDE_SHIFT 8
-#define NV50TCL_STRMOUT_BUFFERS_CTRL_STRIDE_MASK 0x0000ff00
-#define NV50TCL_FP_RESULT_COUNT 0x00001298
-#define NV50TCL_DEPTH_TEST_ENABLE 0x000012cc
-#define NV50TCL_SHADE_MODEL 0x000012d4
-#define NV50TCL_SHADE_MODEL_FLAT 0x00001d00
-#define NV50TCL_SHADE_MODEL_SMOOTH 0x00001d01
-#define NV50TCL_LOCAL_ADDRESS_HIGH 0x000012d8
-#define NV50TCL_LOCAL_ADDRESS_LOW 0x000012dc
-#define NV50TCL_LOCAL_SIZE_LOG 0x000012e0
-#define NV50TCL_DEPTH_WRITE_ENABLE 0x000012e8
-#define NV50TCL_ALPHA_TEST_ENABLE 0x000012ec
-#define NV50TCL_PM_SET(x) (0x000012f0+((x)*4))
-#define NV50TCL_PM_SET__SIZE 0x00000004
-#define NV50TCL_VB_ELEMENT_U8_SETUP 0x00001300
-#define NV50TCL_VB_ELEMENT_U8_SETUP_OFFSET_SHIFT 30
-#define NV50TCL_VB_ELEMENT_U8_SETUP_OFFSET_MASK 0xc0000000
-#define NV50TCL_VB_ELEMENT_U8_SETUP_COUNT_SHIFT 0
-#define NV50TCL_VB_ELEMENT_U8_SETUP_COUNT_MASK 0x3fffffff
-#define NV50TCL_VB_ELEMENT_U8 0x00001304
-#define NV50TCL_VB_ELEMENT_U8_I0_SHIFT 0
-#define NV50TCL_VB_ELEMENT_U8_I0_MASK 0x000000ff
-#define NV50TCL_VB_ELEMENT_U8_I1_SHIFT 8
-#define NV50TCL_VB_ELEMENT_U8_I1_MASK 0x0000ff00
-#define NV50TCL_VB_ELEMENT_U8_I2_SHIFT 16
-#define NV50TCL_VB_ELEMENT_U8_I2_MASK 0x00ff0000
-#define NV50TCL_VB_ELEMENT_U8_I3_SHIFT 24
-#define NV50TCL_VB_ELEMENT_U8_I3_MASK 0xff000000
-#define NV50TCL_DEPTH_TEST_FUNC 0x0000130c
-#define NV50TCL_DEPTH_TEST_FUNC_NEVER 0x00000200
-#define NV50TCL_DEPTH_TEST_FUNC_LESS 0x00000201
-#define NV50TCL_DEPTH_TEST_FUNC_EQUAL 0x00000202
-#define NV50TCL_DEPTH_TEST_FUNC_LEQUAL 0x00000203
-#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204
-#define NV50TCL_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
-#define NV50TCL_DEPTH_TEST_FUNC_GEQUAL 0x00000206
-#define NV50TCL_DEPTH_TEST_FUNC_ALWAYS 0x00000207
-#define NV50TCL_ALPHA_TEST_REF 0x00001310
-#define NV50TCL_ALPHA_TEST_FUNC 0x00001314
-#define NV50TCL_ALPHA_TEST_FUNC_NEVER 0x00000200
-#define NV50TCL_ALPHA_TEST_FUNC_LESS 0x00000201
-#define NV50TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202
-#define NV50TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203
-#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204
-#define NV50TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
-#define NV50TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206
-#define NV50TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207
-#define NV50TCL_BLEND_COLOR(x) (0x0000131c+((x)*4))
-#define NV50TCL_BLEND_COLOR__SIZE 0x00000004
-#define NV50TCL_TIC_FLUSH 0x00001330
-#define NV50TCL_TSC_FLUSH 0x00001334
-#define NV50TCL_TEX_CACHE_CTL 0x00001338
-#define NV50TCL_BLEND_EQUATION_RGB 0x00001340
-#define NV50TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
-#define NV50TCL_BLEND_EQUATION_RGB_MIN 0x00008007
-#define NV50TCL_BLEND_EQUATION_RGB_MAX 0x00008008
-#define NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
-#define NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
-#define NV50TCL_BLEND_FUNC_SRC_RGB 0x00001344
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00004000
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00004001
-#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00004300
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00004301
-#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00004302
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
-#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00004304
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00004305
-#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00004306
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00004307
-#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00004308
-#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x0000c001
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
-#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x0000c003
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
-#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR 0x0000c900
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
-#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA 0x0000c902
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
-#define NV50TCL_BLEND_FUNC_DST_RGB 0x00001348
-#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00004000
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00004001
-#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00004300
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00004301
-#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00004302
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
-#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00004304
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00004305
-#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00004306
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00004307
-#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00004308
-#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x0000c001
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
-#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x0000c003
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
-#define NV50TCL_BLEND_FUNC_DST_RGB_SRC1_COLOR 0x0000c900
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
-#define NV50TCL_BLEND_FUNC_DST_RGB_SRC1_ALPHA 0x0000c902
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
-#define NV50TCL_BLEND_EQUATION_ALPHA 0x0000134c
-#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
-#define NV50TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007
-#define NV50TCL_BLEND_EQUATION_ALPHA_MAX 0x00008008
-#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
-#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA 0x00001350
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00004000
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00004001
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00004300
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00004302
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00004304
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00004306
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00004308
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x0000c001
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x0000c003
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_COLOR 0x0000c900
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_ALPHA 0x0000c902
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
-#define NV50TCL_BLEND_FUNC_DST_ALPHA 0x00001358
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00004000
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00004001
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00004300
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00004302
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00004304
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00004306
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00004308
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x0000c001
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x0000c003
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_COLOR 0x0000c900
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_ALPHA 0x0000c902
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
-#define NV50TCL_BLEND_ENABLE(x) (0x00001360+((x)*4))
-#define NV50TCL_BLEND_ENABLE__SIZE 0x00000008
-#define NV50TCL_STENCIL_FRONT_ENABLE 0x00001380
-#define NV50TCL_STENCIL_FRONT_OP_FAIL 0x00001384
-#define NV50TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
-#define NV50TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
-#define NV50TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
-#define NV50TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
-#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
-#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
-#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
-#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
-#define NV50TCL_STENCIL_FRONT_OP_ZFAIL 0x00001388
-#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
-#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
-#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
-#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
-#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
-#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
-#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
-#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
-#define NV50TCL_STENCIL_FRONT_OP_ZPASS 0x0000138c
-#define NV50TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
-#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
-#define NV50TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
-#define NV50TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
-#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
-#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
-#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
-#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
-#define NV50TCL_STENCIL_FRONT_FUNC_FUNC 0x00001390
-#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
-#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
-#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
-#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
-#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
-#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
-#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
-#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
-#define NV50TCL_STENCIL_FRONT_FUNC_REF 0x00001394
-#define NV50TCL_STENCIL_FRONT_MASK 0x00001398
-#define NV50TCL_STENCIL_FRONT_FUNC_MASK 0x0000139c
-#define NV50TCL_FRAG_COLOR_CLAMP_EN 0x000013a8
-#define NV50TCL_Y_ORIGIN_BOTTOM 0x000013ac
-#define NV50TCL_LINE_WIDTH 0x000013b0
-#define NV50TCL_TEX_LIMITS(x) (0x000013b4+((x)*4))
-#define NV50TCL_TEX_LIMITS__SIZE 0x00000003
-#define NV50TCL_TEX_LIMITS_SAMPLERS_LOG2_SHIFT 0
-#define NV50TCL_TEX_LIMITS_SAMPLERS_LOG2_MASK 0x0000000f
-#define NV50TCL_TEX_LIMITS_TEXTURES_LOG2_SHIFT 4
-#define NV50TCL_TEX_LIMITS_TEXTURES_LOG2_MASK 0x000000f0
-#define NV50TCL_POINT_COORD_REPLACE_MAP(x) (0x000013c0+((x)*4))
-#define NV50TCL_POINT_COORD_REPLACE_MAP__SIZE 0x00000008
-#define NV50TCL_VP_START_ID 0x0000140c
-#define NV50TCL_GP_START_ID 0x00001410
-#define NV50TCL_FP_START_ID 0x00001414
-#define NV50TCL_GP_VERTEX_OUTPUT_COUNT 0x00001420
-#define NV50TCL_VB_ELEMENT_BASE 0x00001434
-#define NV50TCL_CLEAR_FLAGS 0x0000143c
-#define NV50TCL_CLEAR_FLAGS_OGL (1 << 0)
-#define NV50TCL_CLEAR_FLAGS_D3D (1 << 4)
-#define NV50TCL_INSTANCE_BASE 0x00001438
-#define NV50TCL_CODE_CB_FLUSH 0x00001440
-#define NV50TCL_BIND_TSC(x) (0x00001444+((x)*8))
-#define NV50TCL_BIND_TSC__SIZE 0x00000003
-#define NV50TCL_BIND_TSC_VALID (1 << 0)
-#define NV50TCL_BIND_TSC_SAMPLER_SHIFT 4
-#define NV50TCL_BIND_TSC_SAMPLER_MASK 0x000000f0
-#define NV50TCL_BIND_TSC_TSC_SHIFT 12
-#define NV50TCL_BIND_TSC_TSC_MASK 0x001ff000
-#define NV50TCL_BIND_TIC(x) (0x00001448+((x)*8))
-#define NV50TCL_BIND_TIC__SIZE 0x00000003
-#define NV50TCL_BIND_TIC_VALID (1 << 0)
-#define NV50TCL_BIND_TIC_TEXTURE_SHIFT 1
-#define NV50TCL_BIND_TIC_TEXTURE_MASK 0x000001fe
-#define NV50TCL_BIND_TIC_TIC_SHIFT 9
-#define NV50TCL_BIND_TIC_TIC_MASK 0x7ffffe00
-#define NV50TCL_STRMOUT_MAP(x) (0x00001480+((x)*4))
-#define NV50TCL_STRMOUT_MAP__SIZE 0x00000020
-#define NV50TCL_CLIPID_HEIGHT 0x00001504
-#define NV50TCL_VP_CLIP_DISTANCE_ENABLE 0x00001510
-#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_0 (1 << 0)
-#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_1 (1 << 1)
-#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_2 (1 << 2)
-#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_3 (1 << 3)
-#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_4 (1 << 4)
-#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_5 (1 << 5)
-#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_6 (1 << 6)
-#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_7 (1 << 7)
-#define NV50TCL_SAMPLECNT_ENABLE 0x00001514
-#define NV50TCL_POINT_SIZE 0x00001518
-#define NV50TCL_POINT_SPRITE_ENABLE 0x00001520
-#define NV50TCL_SAMPLECNT_RESET 0x00001530
-#define NV50TCL_ZETA_ENABLE 0x00001538
-#define NV50TCL_MULTISAMPLE_CTRL 0x0000153c
-#define NV50TCL_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE (1 << 0)
-#define NV50TCL_MULTISAMPLE_CTRL_ALPHA_TO_ONE (1 << 4)
-#define NV50TCL_NOPERSPECTIVE_BITMAP(x) (0x00001540+((x)*4))
-#define NV50TCL_NOPERSPECTIVE_BITMAP__SIZE 0x00000004
-#define NV50TCL_COND_ADDRESS_HIGH 0x00001550
-#define NV50TCL_COND_ADDRESS_LOW 0x00001554
-#define NV50TCL_COND_MODE 0x00001558
-#define NV50TCL_COND_MODE_NEVER 0x00000000
-#define NV50TCL_COND_MODE_ALWAYS 0x00000001
-#define NV50TCL_COND_MODE_RES 0x00000002
-#define NV50TCL_COND_MODE_NOT_RES_AND_NOT_ID 0x00000003
-#define NV50TCL_COND_MODE_RES_OR_ID 0x00000004
-#define NV50TCL_TSC_ADDRESS_HIGH 0x0000155c
-#define NV50TCL_TSC_ADDRESS_LOW 0x00001560
-#define NV50TCL_TSC_LIMIT 0x00001564
-#define NV50TCL_POLYGON_OFFSET_FACTOR 0x0000156c
-#define NV50TCL_LINE_SMOOTH_ENABLE 0x00001570
-#define NV50TCL_TIC_ADDRESS_HIGH 0x00001574
-#define NV50TCL_TIC_ADDRESS_LOW 0x00001578
-#define NV50TCL_TIC_LIMIT 0x0000157c
-#define NV50TCL_PM_CONTROL(x) (0x00001580+((x)*4))
-#define NV50TCL_PM_CONTROL__SIZE 0x00000004
-#define NV50TCL_PM_CONTROL_UNK0 (1 << 0)
-#define NV50TCL_PM_CONTROL_UNK1_SHIFT 4
-#define NV50TCL_PM_CONTROL_UNK1_MASK 0x00000070
-#define NV50TCL_PM_CONTROL_UNK2_SHIFT 8
-#define NV50TCL_PM_CONTROL_UNK2_MASK 0xffffff00
-#define NV50TCL_STENCIL_BACK_ENABLE 0x00001594
-#define NV50TCL_STENCIL_BACK_OP_FAIL 0x00001598
-#define NV50TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
-#define NV50TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
-#define NV50TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
-#define NV50TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
-#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
-#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
-#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
-#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
-#define NV50TCL_STENCIL_BACK_OP_ZFAIL 0x0000159c
-#define NV50TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
-#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
-#define NV50TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
-#define NV50TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
-#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
-#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
-#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
-#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
-#define NV50TCL_STENCIL_BACK_OP_ZPASS 0x000015a0
-#define NV50TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
-#define NV50TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
-#define NV50TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
-#define NV50TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
-#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
-#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
-#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
-#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
-#define NV50TCL_STENCIL_BACK_FUNC_FUNC 0x000015a4
-#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
-#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
-#define NV50TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
-#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
-#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
-#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
-#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
-#define NV50TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
-#define NV50TCL_FRAMEBUFFER_SRGB 0x000015b8
-#define NV50TCL_POLYGON_OFFSET_UNITS 0x000015bc
-#define NV50TCL_GP_BUILTIN_RESULT_EN 0x000015cc
-#define NV50TCL_GP_BUILTIN_RESULT_EN_VPORT_IDX (1 << 0)
-#define NV50TCL_GP_BUILTIN_RESULT_EN_LAYER_IDX (1 << 16)
-#define NV50TCL_MULTISAMPLE_MODE 0x000015d0
-#define NV50TCL_MULTISAMPLE_MODE_1X 0x00000000
-#define NV50TCL_MULTISAMPLE_MODE_2XMS 0x00000001
-#define NV50TCL_MULTISAMPLE_MODE_4XMS 0x00000002
-#define NV50TCL_MULTISAMPLE_MODE_8XMS 0x00000004
-#define NV50TCL_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008
-#define NV50TCL_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009
-#define NV50TCL_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a
-#define NV50TCL_VERTEX_BEGIN 0x000015dc
-#define NV50TCL_VERTEX_BEGIN_POINTS 0x00000000
-#define NV50TCL_VERTEX_BEGIN_LINES 0x00000001
-#define NV50TCL_VERTEX_BEGIN_LINE_LOOP 0x00000002
-#define NV50TCL_VERTEX_BEGIN_LINE_STRIP 0x00000003
-#define NV50TCL_VERTEX_BEGIN_TRIANGLES 0x00000004
-#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP 0x00000005
-#define NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN 0x00000006
-#define NV50TCL_VERTEX_BEGIN_QUADS 0x00000007
-#define NV50TCL_VERTEX_BEGIN_QUAD_STRIP 0x00000008
-#define NV50TCL_VERTEX_BEGIN_POLYGON 0x00000009
-#define NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY 0x0000000a
-#define NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY 0x0000000b
-#define NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY 0x0000000c
-#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY 0x0000000d
-#define NV50TCL_VERTEX_BEGIN_PATCHES 0x0000000e
-#define NV50TCL_VERTEX_END 0x000015e0
-#define NV50TCL_EDGEFLAG_ENABLE 0x000015e4
-#define NV50TCL_VB_ELEMENT_U32 0x000015e8
-#define NV50TCL_VB_ELEMENT_U16_SETUP 0x000015ec
-#define NV50TCL_VB_ELEMENT_U16_SETUP_OFFSET_SHIFT 30
-#define NV50TCL_VB_ELEMENT_U16_SETUP_OFFSET_MASK 0xc0000000
-#define NV50TCL_VB_ELEMENT_U16_SETUP_COUNT_SHIFT 0
-#define NV50TCL_VB_ELEMENT_U16_SETUP_COUNT_MASK 0x3fffffff
-#define NV50TCL_VB_ELEMENT_U16 0x000015f0
-#define NV50TCL_VB_ELEMENT_U16_I0_SHIFT 0
-#define NV50TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
-#define NV50TCL_VB_ELEMENT_U16_I1_SHIFT 16
-#define NV50TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
-#define NV50TCL_VERTEX_BASE_HIGH 0x000015f4
-#define NV50TCL_VERTEX_BASE_LOW 0x000015f8
-#define NV50TCL_VERTEX_DATA 0x00001640
-#define NV50TCL_PRIM_RESTART_ENABLE 0x00001644
-#define NV50TCL_PRIM_RESTART_INDEX 0x00001648
-#define NV50TCL_VP_GP_BUILTIN_ATTR_EN 0x0000164c
-#define NV50TCL_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID (1 << 0)
-#define NV50TCL_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID (1 << 4)
-#define NV50TCL_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID (1 << 8)
-#define NV50TCL_VP_GP_BUILTIN_ATTR_EN_UNK12 (1 << 12)
-#define NV50TCL_VP_ATTR_EN_0 0x00001650
-#define NV50TCL_VP_ATTR_EN_0_7_SHIFT 28
-#define NV50TCL_VP_ATTR_EN_0_7_MASK 0xf0000000
-#define NV50TCL_VP_ATTR_EN_0_7_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_0_7_XNNN 0x10000000
-#define NV50TCL_VP_ATTR_EN_0_7_NYNN 0x20000000
-#define NV50TCL_VP_ATTR_EN_0_7_XYNN 0x30000000
-#define NV50TCL_VP_ATTR_EN_0_7_NNZN 0x40000000
-#define NV50TCL_VP_ATTR_EN_0_7_XNZN 0x50000000
-#define NV50TCL_VP_ATTR_EN_0_7_NYZN 0x60000000
-#define NV50TCL_VP_ATTR_EN_0_7_XYZN 0x70000000
-#define NV50TCL_VP_ATTR_EN_0_7_NNNW 0x80000000
-#define NV50TCL_VP_ATTR_EN_0_7_XNNW 0x90000000
-#define NV50TCL_VP_ATTR_EN_0_7_NYNW 0xa0000000
-#define NV50TCL_VP_ATTR_EN_0_7_XYNW 0xb0000000
-#define NV50TCL_VP_ATTR_EN_0_7_NNZW 0xc0000000
-#define NV50TCL_VP_ATTR_EN_0_7_XNZW 0xd0000000
-#define NV50TCL_VP_ATTR_EN_0_7_NYZW 0xe0000000
-#define NV50TCL_VP_ATTR_EN_0_7_XYZW 0xf0000000
-#define NV50TCL_VP_ATTR_EN_0_6_SHIFT 24
-#define NV50TCL_VP_ATTR_EN_0_6_MASK 0x0f000000
-#define NV50TCL_VP_ATTR_EN_0_6_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_0_6_XNNN 0x01000000
-#define NV50TCL_VP_ATTR_EN_0_6_NYNN 0x02000000
-#define NV50TCL_VP_ATTR_EN_0_6_XYNN 0x03000000
-#define NV50TCL_VP_ATTR_EN_0_6_NNZN 0x04000000
-#define NV50TCL_VP_ATTR_EN_0_6_XNZN 0x05000000
-#define NV50TCL_VP_ATTR_EN_0_6_NYZN 0x06000000
-#define NV50TCL_VP_ATTR_EN_0_6_XYZN 0x07000000
-#define NV50TCL_VP_ATTR_EN_0_6_NNNW 0x08000000
-#define NV50TCL_VP_ATTR_EN_0_6_XNNW 0x09000000
-#define NV50TCL_VP_ATTR_EN_0_6_NYNW 0x0a000000
-#define NV50TCL_VP_ATTR_EN_0_6_XYNW 0x0b000000
-#define NV50TCL_VP_ATTR_EN_0_6_NNZW 0x0c000000
-#define NV50TCL_VP_ATTR_EN_0_6_XNZW 0x0d000000
-#define NV50TCL_VP_ATTR_EN_0_6_NYZW 0x0e000000
-#define NV50TCL_VP_ATTR_EN_0_6_XYZW 0x0f000000
-#define NV50TCL_VP_ATTR_EN_0_5_SHIFT 20
-#define NV50TCL_VP_ATTR_EN_0_5_MASK 0x00f00000
-#define NV50TCL_VP_ATTR_EN_0_5_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_0_5_XNNN 0x00100000
-#define NV50TCL_VP_ATTR_EN_0_5_NYNN 0x00200000
-#define NV50TCL_VP_ATTR_EN_0_5_XYNN 0x00300000
-#define NV50TCL_VP_ATTR_EN_0_5_NNZN 0x00400000
-#define NV50TCL_VP_ATTR_EN_0_5_XNZN 0x00500000
-#define NV50TCL_VP_ATTR_EN_0_5_NYZN 0x00600000
-#define NV50TCL_VP_ATTR_EN_0_5_XYZN 0x00700000
-#define NV50TCL_VP_ATTR_EN_0_5_NNNW 0x00800000
-#define NV50TCL_VP_ATTR_EN_0_5_XNNW 0x00900000
-#define NV50TCL_VP_ATTR_EN_0_5_NYNW 0x00a00000
-#define NV50TCL_VP_ATTR_EN_0_5_XYNW 0x00b00000
-#define NV50TCL_VP_ATTR_EN_0_5_NNZW 0x00c00000
-#define NV50TCL_VP_ATTR_EN_0_5_XNZW 0x00d00000
-#define NV50TCL_VP_ATTR_EN_0_5_NYZW 0x00e00000
-#define NV50TCL_VP_ATTR_EN_0_5_XYZW 0x00f00000
-#define NV50TCL_VP_ATTR_EN_0_4_SHIFT 16
-#define NV50TCL_VP_ATTR_EN_0_4_MASK 0x000f0000
-#define NV50TCL_VP_ATTR_EN_0_4_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_0_4_XNNN 0x00010000
-#define NV50TCL_VP_ATTR_EN_0_4_NYNN 0x00020000
-#define NV50TCL_VP_ATTR_EN_0_4_XYNN 0x00030000
-#define NV50TCL_VP_ATTR_EN_0_4_NNZN 0x00040000
-#define NV50TCL_VP_ATTR_EN_0_4_XNZN 0x00050000
-#define NV50TCL_VP_ATTR_EN_0_4_NYZN 0x00060000
-#define NV50TCL_VP_ATTR_EN_0_4_XYZN 0x00070000
-#define NV50TCL_VP_ATTR_EN_0_4_NNNW 0x00080000
-#define NV50TCL_VP_ATTR_EN_0_4_XNNW 0x00090000
-#define NV50TCL_VP_ATTR_EN_0_4_NYNW 0x000a0000
-#define NV50TCL_VP_ATTR_EN_0_4_XYNW 0x000b0000
-#define NV50TCL_VP_ATTR_EN_0_4_NNZW 0x000c0000
-#define NV50TCL_VP_ATTR_EN_0_4_XNZW 0x000d0000
-#define NV50TCL_VP_ATTR_EN_0_4_NYZW 0x000e0000
-#define NV50TCL_VP_ATTR_EN_0_4_XYZW 0x000f0000
-#define NV50TCL_VP_ATTR_EN_0_3_SHIFT 12
-#define NV50TCL_VP_ATTR_EN_0_3_MASK 0x0000f000
-#define NV50TCL_VP_ATTR_EN_0_3_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_0_3_XNNN 0x00001000
-#define NV50TCL_VP_ATTR_EN_0_3_NYNN 0x00002000
-#define NV50TCL_VP_ATTR_EN_0_3_XYNN 0x00003000
-#define NV50TCL_VP_ATTR_EN_0_3_NNZN 0x00004000
-#define NV50TCL_VP_ATTR_EN_0_3_XNZN 0x00005000
-#define NV50TCL_VP_ATTR_EN_0_3_NYZN 0x00006000
-#define NV50TCL_VP_ATTR_EN_0_3_XYZN 0x00007000
-#define NV50TCL_VP_ATTR_EN_0_3_NNNW 0x00008000
-#define NV50TCL_VP_ATTR_EN_0_3_XNNW 0x00009000
-#define NV50TCL_VP_ATTR_EN_0_3_NYNW 0x0000a000
-#define NV50TCL_VP_ATTR_EN_0_3_XYNW 0x0000b000
-#define NV50TCL_VP_ATTR_EN_0_3_NNZW 0x0000c000
-#define NV50TCL_VP_ATTR_EN_0_3_XNZW 0x0000d000
-#define NV50TCL_VP_ATTR_EN_0_3_NYZW 0x0000e000
-#define NV50TCL_VP_ATTR_EN_0_3_XYZW 0x0000f000
-#define NV50TCL_VP_ATTR_EN_0_2_SHIFT 8
-#define NV50TCL_VP_ATTR_EN_0_2_MASK 0x00000f00
-#define NV50TCL_VP_ATTR_EN_0_2_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_0_2_XNNN 0x00000100
-#define NV50TCL_VP_ATTR_EN_0_2_NYNN 0x00000200
-#define NV50TCL_VP_ATTR_EN_0_2_XYNN 0x00000300
-#define NV50TCL_VP_ATTR_EN_0_2_NNZN 0x00000400
-#define NV50TCL_VP_ATTR_EN_0_2_XNZN 0x00000500
-#define NV50TCL_VP_ATTR_EN_0_2_NYZN 0x00000600
-#define NV50TCL_VP_ATTR_EN_0_2_XYZN 0x00000700
-#define NV50TCL_VP_ATTR_EN_0_2_NNNW 0x00000800
-#define NV50TCL_VP_ATTR_EN_0_2_XNNW 0x00000900
-#define NV50TCL_VP_ATTR_EN_0_2_NYNW 0x00000a00
-#define NV50TCL_VP_ATTR_EN_0_2_XYNW 0x00000b00
-#define NV50TCL_VP_ATTR_EN_0_2_NNZW 0x00000c00
-#define NV50TCL_VP_ATTR_EN_0_2_XNZW 0x00000d00
-#define NV50TCL_VP_ATTR_EN_0_2_NYZW 0x00000e00
-#define NV50TCL_VP_ATTR_EN_0_2_XYZW 0x00000f00
-#define NV50TCL_VP_ATTR_EN_0_1_SHIFT 4
-#define NV50TCL_VP_ATTR_EN_0_1_MASK 0x000000f0
-#define NV50TCL_VP_ATTR_EN_0_1_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_0_1_XNNN 0x00000010
-#define NV50TCL_VP_ATTR_EN_0_1_NYNN 0x00000020
-#define NV50TCL_VP_ATTR_EN_0_1_XYNN 0x00000030
-#define NV50TCL_VP_ATTR_EN_0_1_NNZN 0x00000040
-#define NV50TCL_VP_ATTR_EN_0_1_XNZN 0x00000050
-#define NV50TCL_VP_ATTR_EN_0_1_NYZN 0x00000060
-#define NV50TCL_VP_ATTR_EN_0_1_XYZN 0x00000070
-#define NV50TCL_VP_ATTR_EN_0_1_NNNW 0x00000080
-#define NV50TCL_VP_ATTR_EN_0_1_XNNW 0x00000090
-#define NV50TCL_VP_ATTR_EN_0_1_NYNW 0x000000a0
-#define NV50TCL_VP_ATTR_EN_0_1_XYNW 0x000000b0
-#define NV50TCL_VP_ATTR_EN_0_1_NNZW 0x000000c0
-#define NV50TCL_VP_ATTR_EN_0_1_XNZW 0x000000d0
-#define NV50TCL_VP_ATTR_EN_0_1_NYZW 0x000000e0
-#define NV50TCL_VP_ATTR_EN_0_1_XYZW 0x000000f0
-#define NV50TCL_VP_ATTR_EN_0_0_SHIFT 0
-#define NV50TCL_VP_ATTR_EN_0_0_MASK 0x0000000f
-#define NV50TCL_VP_ATTR_EN_0_0_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_0_0_XNNN 0x00000001
-#define NV50TCL_VP_ATTR_EN_0_0_NYNN 0x00000002
-#define NV50TCL_VP_ATTR_EN_0_0_XYNN 0x00000003
-#define NV50TCL_VP_ATTR_EN_0_0_NNZN 0x00000004
-#define NV50TCL_VP_ATTR_EN_0_0_XNZN 0x00000005
-#define NV50TCL_VP_ATTR_EN_0_0_NYZN 0x00000006
-#define NV50TCL_VP_ATTR_EN_0_0_XYZN 0x00000007
-#define NV50TCL_VP_ATTR_EN_0_0_NNNW 0x00000008
-#define NV50TCL_VP_ATTR_EN_0_0_XNNW 0x00000009
-#define NV50TCL_VP_ATTR_EN_0_0_NYNW 0x0000000a
-#define NV50TCL_VP_ATTR_EN_0_0_XYNW 0x0000000b
-#define NV50TCL_VP_ATTR_EN_0_0_NNZW 0x0000000c
-#define NV50TCL_VP_ATTR_EN_0_0_XNZW 0x0000000d
-#define NV50TCL_VP_ATTR_EN_0_0_NYZW 0x0000000e
-#define NV50TCL_VP_ATTR_EN_0_0_XYZW 0x0000000f
-#define NV50TCL_VP_ATTR_EN_1 0x00001654
-#define NV50TCL_VP_ATTR_EN_1_15_SHIFT 28
-#define NV50TCL_VP_ATTR_EN_1_15_MASK 0xf0000000
-#define NV50TCL_VP_ATTR_EN_1_15_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_1_15_XNNN 0x10000000
-#define NV50TCL_VP_ATTR_EN_1_15_NYNN 0x20000000
-#define NV50TCL_VP_ATTR_EN_1_15_XYNN 0x30000000
-#define NV50TCL_VP_ATTR_EN_1_15_NNZN 0x40000000
-#define NV50TCL_VP_ATTR_EN_1_15_XNZN 0x50000000
-#define NV50TCL_VP_ATTR_EN_1_15_NYZN 0x60000000
-#define NV50TCL_VP_ATTR_EN_1_15_XYZN 0x70000000
-#define NV50TCL_VP_ATTR_EN_1_15_NNNW 0x80000000
-#define NV50TCL_VP_ATTR_EN_1_15_XNNW 0x90000000
-#define NV50TCL_VP_ATTR_EN_1_15_NYNW 0xa0000000
-#define NV50TCL_VP_ATTR_EN_1_15_XYNW 0xb0000000
-#define NV50TCL_VP_ATTR_EN_1_15_NNZW 0xc0000000
-#define NV50TCL_VP_ATTR_EN_1_15_XNZW 0xd0000000
-#define NV50TCL_VP_ATTR_EN_1_15_NYZW 0xe0000000
-#define NV50TCL_VP_ATTR_EN_1_15_XYZW 0xf0000000
-#define NV50TCL_VP_ATTR_EN_1_14_SHIFT 24
-#define NV50TCL_VP_ATTR_EN_1_14_MASK 0x0f000000
-#define NV50TCL_VP_ATTR_EN_1_14_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_1_14_XNNN 0x01000000
-#define NV50TCL_VP_ATTR_EN_1_14_NYNN 0x02000000
-#define NV50TCL_VP_ATTR_EN_1_14_XYNN 0x03000000
-#define NV50TCL_VP_ATTR_EN_1_14_NNZN 0x04000000
-#define NV50TCL_VP_ATTR_EN_1_14_XNZN 0x05000000
-#define NV50TCL_VP_ATTR_EN_1_14_NYZN 0x06000000
-#define NV50TCL_VP_ATTR_EN_1_14_XYZN 0x07000000
-#define NV50TCL_VP_ATTR_EN_1_14_NNNW 0x08000000
-#define NV50TCL_VP_ATTR_EN_1_14_XNNW 0x09000000
-#define NV50TCL_VP_ATTR_EN_1_14_NYNW 0x0a000000
-#define NV50TCL_VP_ATTR_EN_1_14_XYNW 0x0b000000
-#define NV50TCL_VP_ATTR_EN_1_14_NNZW 0x0c000000
-#define NV50TCL_VP_ATTR_EN_1_14_XNZW 0x0d000000
-#define NV50TCL_VP_ATTR_EN_1_14_NYZW 0x0e000000
-#define NV50TCL_VP_ATTR_EN_1_14_XYZW 0x0f000000
-#define NV50TCL_VP_ATTR_EN_1_13_SHIFT 20
-#define NV50TCL_VP_ATTR_EN_1_13_MASK 0x00f00000
-#define NV50TCL_VP_ATTR_EN_1_13_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_1_13_XNNN 0x00100000
-#define NV50TCL_VP_ATTR_EN_1_13_NYNN 0x00200000
-#define NV50TCL_VP_ATTR_EN_1_13_XYNN 0x00300000
-#define NV50TCL_VP_ATTR_EN_1_13_NNZN 0x00400000
-#define NV50TCL_VP_ATTR_EN_1_13_XNZN 0x00500000
-#define NV50TCL_VP_ATTR_EN_1_13_NYZN 0x00600000
-#define NV50TCL_VP_ATTR_EN_1_13_XYZN 0x00700000
-#define NV50TCL_VP_ATTR_EN_1_13_NNNW 0x00800000
-#define NV50TCL_VP_ATTR_EN_1_13_XNNW 0x00900000
-#define NV50TCL_VP_ATTR_EN_1_13_NYNW 0x00a00000
-#define NV50TCL_VP_ATTR_EN_1_13_XYNW 0x00b00000
-#define NV50TCL_VP_ATTR_EN_1_13_NNZW 0x00c00000
-#define NV50TCL_VP_ATTR_EN_1_13_XNZW 0x00d00000
-#define NV50TCL_VP_ATTR_EN_1_13_NYZW 0x00e00000
-#define NV50TCL_VP_ATTR_EN_1_13_XYZW 0x00f00000
-#define NV50TCL_VP_ATTR_EN_1_12_SHIFT 16
-#define NV50TCL_VP_ATTR_EN_1_12_MASK 0x000f0000
-#define NV50TCL_VP_ATTR_EN_1_12_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_1_12_XNNN 0x00010000
-#define NV50TCL_VP_ATTR_EN_1_12_NYNN 0x00020000
-#define NV50TCL_VP_ATTR_EN_1_12_XYNN 0x00030000
-#define NV50TCL_VP_ATTR_EN_1_12_NNZN 0x00040000
-#define NV50TCL_VP_ATTR_EN_1_12_XNZN 0x00050000
-#define NV50TCL_VP_ATTR_EN_1_12_NYZN 0x00060000
-#define NV50TCL_VP_ATTR_EN_1_12_XYZN 0x00070000
-#define NV50TCL_VP_ATTR_EN_1_12_NNNW 0x00080000
-#define NV50TCL_VP_ATTR_EN_1_12_XNNW 0x00090000
-#define NV50TCL_VP_ATTR_EN_1_12_NYNW 0x000a0000
-#define NV50TCL_VP_ATTR_EN_1_12_XYNW 0x000b0000
-#define NV50TCL_VP_ATTR_EN_1_12_NNZW 0x000c0000
-#define NV50TCL_VP_ATTR_EN_1_12_XNZW 0x000d0000
-#define NV50TCL_VP_ATTR_EN_1_12_NYZW 0x000e0000
-#define NV50TCL_VP_ATTR_EN_1_12_XYZW 0x000f0000
-#define NV50TCL_VP_ATTR_EN_1_11_SHIFT 12
-#define NV50TCL_VP_ATTR_EN_1_11_MASK 0x0000f000
-#define NV50TCL_VP_ATTR_EN_1_11_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_1_11_XNNN 0x00001000
-#define NV50TCL_VP_ATTR_EN_1_11_NYNN 0x00002000
-#define NV50TCL_VP_ATTR_EN_1_11_XYNN 0x00003000
-#define NV50TCL_VP_ATTR_EN_1_11_NNZN 0x00004000
-#define NV50TCL_VP_ATTR_EN_1_11_XNZN 0x00005000
-#define NV50TCL_VP_ATTR_EN_1_11_NYZN 0x00006000
-#define NV50TCL_VP_ATTR_EN_1_11_XYZN 0x00007000
-#define NV50TCL_VP_ATTR_EN_1_11_NNNW 0x00008000
-#define NV50TCL_VP_ATTR_EN_1_11_XNNW 0x00009000
-#define NV50TCL_VP_ATTR_EN_1_11_NYNW 0x0000a000
-#define NV50TCL_VP_ATTR_EN_1_11_XYNW 0x0000b000
-#define NV50TCL_VP_ATTR_EN_1_11_NNZW 0x0000c000
-#define NV50TCL_VP_ATTR_EN_1_11_XNZW 0x0000d000
-#define NV50TCL_VP_ATTR_EN_1_11_NYZW 0x0000e000
-#define NV50TCL_VP_ATTR_EN_1_11_XYZW 0x0000f000
-#define NV50TCL_VP_ATTR_EN_1_10_SHIFT 8
-#define NV50TCL_VP_ATTR_EN_1_10_MASK 0x00000f00
-#define NV50TCL_VP_ATTR_EN_1_10_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_1_10_XNNN 0x00000100
-#define NV50TCL_VP_ATTR_EN_1_10_NYNN 0x00000200
-#define NV50TCL_VP_ATTR_EN_1_10_XYNN 0x00000300
-#define NV50TCL_VP_ATTR_EN_1_10_NNZN 0x00000400
-#define NV50TCL_VP_ATTR_EN_1_10_XNZN 0x00000500
-#define NV50TCL_VP_ATTR_EN_1_10_NYZN 0x00000600
-#define NV50TCL_VP_ATTR_EN_1_10_XYZN 0x00000700
-#define NV50TCL_VP_ATTR_EN_1_10_NNNW 0x00000800
-#define NV50TCL_VP_ATTR_EN_1_10_XNNW 0x00000900
-#define NV50TCL_VP_ATTR_EN_1_10_NYNW 0x00000a00
-#define NV50TCL_VP_ATTR_EN_1_10_XYNW 0x00000b00
-#define NV50TCL_VP_ATTR_EN_1_10_NNZW 0x00000c00
-#define NV50TCL_VP_ATTR_EN_1_10_XNZW 0x00000d00
-#define NV50TCL_VP_ATTR_EN_1_10_NYZW 0x00000e00
-#define NV50TCL_VP_ATTR_EN_1_10_XYZW 0x00000f00
-#define NV50TCL_VP_ATTR_EN_1_9_SHIFT 4
-#define NV50TCL_VP_ATTR_EN_1_9_MASK 0x000000f0
-#define NV50TCL_VP_ATTR_EN_1_9_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_1_9_XNNN 0x00000010
-#define NV50TCL_VP_ATTR_EN_1_9_NYNN 0x00000020
-#define NV50TCL_VP_ATTR_EN_1_9_XYNN 0x00000030
-#define NV50TCL_VP_ATTR_EN_1_9_NNZN 0x00000040
-#define NV50TCL_VP_ATTR_EN_1_9_XNZN 0x00000050
-#define NV50TCL_VP_ATTR_EN_1_9_NYZN 0x00000060
-#define NV50TCL_VP_ATTR_EN_1_9_XYZN 0x00000070
-#define NV50TCL_VP_ATTR_EN_1_9_NNNW 0x00000080
-#define NV50TCL_VP_ATTR_EN_1_9_XNNW 0x00000090
-#define NV50TCL_VP_ATTR_EN_1_9_NYNW 0x000000a0
-#define NV50TCL_VP_ATTR_EN_1_9_XYNW 0x000000b0
-#define NV50TCL_VP_ATTR_EN_1_9_NNZW 0x000000c0
-#define NV50TCL_VP_ATTR_EN_1_9_XNZW 0x000000d0
-#define NV50TCL_VP_ATTR_EN_1_9_NYZW 0x000000e0
-#define NV50TCL_VP_ATTR_EN_1_9_XYZW 0x000000f0
-#define NV50TCL_VP_ATTR_EN_1_8_SHIFT 0
-#define NV50TCL_VP_ATTR_EN_1_8_MASK 0x0000000f
-#define NV50TCL_VP_ATTR_EN_1_8_NONE 0x00000000
-#define NV50TCL_VP_ATTR_EN_1_8_XNNN 0x00000001
-#define NV50TCL_VP_ATTR_EN_1_8_NYNN 0x00000002
-#define NV50TCL_VP_ATTR_EN_1_8_XYNN 0x00000003
-#define NV50TCL_VP_ATTR_EN_1_8_NNZN 0x00000004
-#define NV50TCL_VP_ATTR_EN_1_8_XNZN 0x00000005
-#define NV50TCL_VP_ATTR_EN_1_8_NYZN 0x00000006
-#define NV50TCL_VP_ATTR_EN_1_8_XYZN 0x00000007
-#define NV50TCL_VP_ATTR_EN_1_8_NNNW 0x00000008
-#define NV50TCL_VP_ATTR_EN_1_8_XNNW 0x00000009
-#define NV50TCL_VP_ATTR_EN_1_8_NYNW 0x0000000a
-#define NV50TCL_VP_ATTR_EN_1_8_XYNW 0x0000000b
-#define NV50TCL_VP_ATTR_EN_1_8_NNZW 0x0000000c
-#define NV50TCL_VP_ATTR_EN_1_8_XNZW 0x0000000d
-#define NV50TCL_VP_ATTR_EN_1_8_NYZW 0x0000000e
-#define NV50TCL_VP_ATTR_EN_1_8_XYZW 0x0000000f
-#define NV50TCL_POINT_SPRITE_CTRL 0x00001660
-#define NV50TCL_LINE_STIPPLE_ENABLE 0x0000166c
-#define NV50TCL_LINE_STIPPLE_PATTERN 0x00001680
-#define NV50TCL_PROVOKING_VERTEX_LAST 0x00001684
-#define NV50TCL_VERTEX_TWO_SIDE_ENABLE 0x00001688
-#define NV50TCL_POLYGON_STIPPLE_ENABLE 0x0000168c
-#define NV50TCL_SET_PROGRAM_CB 0x00001694
-#define NV50TCL_SET_PROGRAM_CB_PROGRAM_SHIFT 4
-#define NV50TCL_SET_PROGRAM_CB_PROGRAM_MASK 0x000000f0
-#define NV50TCL_SET_PROGRAM_CB_PROGRAM_VERTEX 0x00000000
-#define NV50TCL_SET_PROGRAM_CB_PROGRAM_GEOMETRY 0x00000020
-#define NV50TCL_SET_PROGRAM_CB_PROGRAM_FRAGMENT 0x00000030
-#define NV50TCL_SET_PROGRAM_CB_INDEX_SHIFT 8
-#define NV50TCL_SET_PROGRAM_CB_INDEX_MASK 0x00000f00
-#define NV50TCL_SET_PROGRAM_CB_BUFFER_SHIFT 12
-#define NV50TCL_SET_PROGRAM_CB_BUFFER_MASK 0x0007f000
-#define NV50TCL_SET_PROGRAM_CB_VALID (1 << 0)
-#define NV50TCL_VP_RESULT_MAP_SIZE 0x000016ac
-#define NV50TCL_VP_REG_ALLOC_TEMP 0x000016b0
-#define NV50TCL_VP_REG_ALLOC_RESULT 0x000016b8
-#define NV50TCL_VP_RESULT_MAP(x) (0x000016bc+((x)*4))
-#define NV50TCL_VP_RESULT_MAP__SIZE 0x00000010
-#define NV50TCL_VP_RESULT_MAP_0_SHIFT 0
-#define NV50TCL_VP_RESULT_MAP_0_MASK 0x000000ff
-#define NV50TCL_VP_RESULT_MAP_1_SHIFT 8
-#define NV50TCL_VP_RESULT_MAP_1_MASK 0x0000ff00
-#define NV50TCL_VP_RESULT_MAP_2_SHIFT 16
-#define NV50TCL_VP_RESULT_MAP_2_MASK 0x00ff0000
-#define NV50TCL_VP_RESULT_MAP_3_SHIFT 24
-#define NV50TCL_VP_RESULT_MAP_3_MASK 0xff000000
-#define NV50TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001700+((x)*4))
-#define NV50TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020
-#define NV50TCL_GP_ENABLE 0x00001798
-#define NV50TCL_GP_REG_ALLOC_TEMP 0x000017a0
-#define NV50TCL_GP_REG_ALLOC_RESULT 0x000017a8
-#define NV50TCL_GP_RESULT_MAP_SIZE 0x000017ac
-#define NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE 0x000017b0
-#define NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_POINTS 0x00000001
-#define NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP 0x00000002
-#define NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP 0x00000003
-#define NV50TCL_RASTERIZE_ENABLE 0x000017b4
-#define NV50TCL_STRMOUT_ENABLE 0x000017b8
-#define NV50TCL_GP_RESULT_MAP(x) (0x000017fc+((x)*4))
-#define NV50TCL_GP_RESULT_MAP__SIZE 0x00000020
-#define NV50TCL_GP_RESULT_MAP_0_SHIFT 0
-#define NV50TCL_GP_RESULT_MAP_0_MASK 0x000000ff
-#define NV50TCL_GP_RESULT_MAP_1_SHIFT 8
-#define NV50TCL_GP_RESULT_MAP_1_MASK 0x0000ff00
-#define NV50TCL_GP_RESULT_MAP_2_SHIFT 16
-#define NV50TCL_GP_RESULT_MAP_2_MASK 0x00ff0000
-#define NV50TCL_GP_RESULT_MAP_3_SHIFT 24
-#define NV50TCL_GP_RESULT_MAP_3_MASK 0xff000000
-#define NV50TCL_MAP_SEMANTIC_0 0x00001904
-#define NV50TCL_MAP_SEMANTIC_0_FFC0_ID_SHIFT 0
-#define NV50TCL_MAP_SEMANTIC_0_FFC0_ID_MASK 0x000000ff
-#define NV50TCL_MAP_SEMANTIC_0_BFC0_ID_SHIFT 8
-#define NV50TCL_MAP_SEMANTIC_0_BFC0_ID_MASK 0x0000ff00
-#define NV50TCL_MAP_SEMANTIC_0_COLR_NR_SHIFT 16
-#define NV50TCL_MAP_SEMANTIC_0_COLR_NR_MASK 0x00ff0000
-#define NV50TCL_MAP_SEMANTIC_0_CLMP_EN_SHIFT 24
-#define NV50TCL_MAP_SEMANTIC_0_CLMP_EN_MASK 0xff000000
-#define NV50TCL_MAP_SEMANTIC_1 0x00001908
-#define NV50TCL_MAP_SEMANTIC_1_CLIP_LO_SHIFT 0
-#define NV50TCL_MAP_SEMANTIC_1_CLIP_LO_MASK 0x000000ff
-#define NV50TCL_MAP_SEMANTIC_1_CLIP_HI_SHIFT 8
-#define NV50TCL_MAP_SEMANTIC_1_CLIP_HI_MASK 0x0000ff00
-#define NV50TCL_MAP_SEMANTIC_2 0x0000190c
-#define NV50TCL_MAP_SEMANTIC_2_LAYER_ID_SHIFT 0
-#define NV50TCL_MAP_SEMANTIC_2_LAYER_ID_MASK 0x000000ff
-#define NV50TCL_MAP_SEMANTIC_3 0x00001910
-#define NV50TCL_MAP_SEMANTIC_3_PTSZ_EN (1 << 0)
-#define NV50TCL_MAP_SEMANTIC_3_PTSZ_ID_SHIFT 4
-#define NV50TCL_MAP_SEMANTIC_3_PTSZ_ID_MASK 0x00000ff0
-#define NV50TCL_MAP_SEMANTIC_4 0x00001914
-#define NV50TCL_MAP_SEMANTIC_4_PRIM_ID_SHIFT 0
-#define NV50TCL_MAP_SEMANTIC_4_PRIM_ID_MASK 0x000000ff
-#define NV50TCL_CULL_FACE_ENABLE 0x00001918
-#define NV50TCL_FRONT_FACE 0x0000191c
-#define NV50TCL_FRONT_FACE_CW 0x00000900
-#define NV50TCL_FRONT_FACE_CCW 0x00000901
-#define NV50TCL_CULL_FACE 0x00001920
-#define NV50TCL_CULL_FACE_FRONT 0x00000404
-#define NV50TCL_CULL_FACE_BACK 0x00000405
-#define NV50TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
-#define NV50TCL_VIEWPORT_TRANSFORM_EN 0x0000192c
-#define NV50TCL_VIEW_VOLUME_CLIP_CTRL 0x0000193c
-#define NV50TCL_VIEWPORT_CLIP_RECTS_EN 0x0000194c
-#define NV50TCL_VIEWPORT_CLIP_MODE 0x00001950
-#define NV50TCL_VIEWPORT_CLIP_MODE_INCLUDE 0x00000000
-#define NV50TCL_VIEWPORT_CLIP_MODE_EXCLUDE 0x00000001
-#define NV50TCL_VIEWPORT_CLIP_MODE_UNKNOWN 0x00000002
-#define NV50TCL_FP_CTRL_UNK196C 0x0000196c
-#define NV50TCL_CLIPID_ENABLE 0x0000197c
-#define NV50TCL_CLIPID_WIDTH 0x00001980
-#define NV50TCL_CLIPID_ID 0x00001984
-#define NV50TCL_FP_INTERPOLANT_CTRL 0x00001988
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_SHIFT 24
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_MASK 0xff000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NONE 0x00000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XNNN 0x01000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NYNN 0x02000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XYNN 0x03000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NNZN 0x04000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XNZN 0x05000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NYZN 0x06000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XYZN 0x07000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NNNW 0x08000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XNNW 0x09000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NYNW 0x0a000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XYNW 0x0b000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NNZW 0x0c000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XNZW 0x0d000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NYZW 0x0e000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XYZW 0x0f000000
-#define NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT 16
-#define NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_MASK 0x00ff0000
-#define NV50TCL_FP_INTERPOLANT_CTRL_OFFSET_SHIFT 8
-#define NV50TCL_FP_INTERPOLANT_CTRL_OFFSET_MASK 0x0000ff00
-#define NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT 0
-#define NV50TCL_FP_INTERPOLANT_CTRL_COUNT_MASK 0x000000ff
-#define NV50TCL_FP_REG_ALLOC_TEMP 0x0000198c
-#define NV50TCL_REG_MODE 0x000019a0
-#define NV50TCL_REG_MODE_PACKED 0x00000001
-#define NV50TCL_REG_MODE_STRIPED 0x00000002
-#define NV50TCL_FP_CONTROL 0x000019a8
-#define NV50TCL_FP_CONTROL_MULTIPLE_RESULTS (1 << 0)
-#define NV50TCL_FP_CONTROL_EXPORTS_Z (1 << 8)
-#define NV50TCL_FP_CONTROL_USES_KIL (1 << 20)
-#define NV50TCL_DEPTH_BOUNDS_EN 0x000019bc
-#define NV50TCL_LOGIC_OP_ENABLE 0x000019c4
-#define NV50TCL_LOGIC_OP 0x000019c8
-#define NV50TCL_LOGIC_OP_CLEAR 0x00001500
-#define NV50TCL_LOGIC_OP_AND 0x00001501
-#define NV50TCL_LOGIC_OP_AND_REVERSE 0x00001502
-#define NV50TCL_LOGIC_OP_COPY 0x00001503
-#define NV50TCL_LOGIC_OP_AND_INVERTED 0x00001504
-#define NV50TCL_LOGIC_OP_NOOP 0x00001505
-#define NV50TCL_LOGIC_OP_XOR 0x00001506
-#define NV50TCL_LOGIC_OP_OR 0x00001507
-#define NV50TCL_LOGIC_OP_NOR 0x00001508
-#define NV50TCL_LOGIC_OP_EQUIV 0x00001509
-#define NV50TCL_LOGIC_OP_INVERT 0x0000150a
-#define NV50TCL_LOGIC_OP_OR_REVERSE 0x0000150b
-#define NV50TCL_LOGIC_OP_COPY_INVERTED 0x0000150c
-#define NV50TCL_LOGIC_OP_OR_INVERTED 0x0000150d
-#define NV50TCL_LOGIC_OP_NAND 0x0000150e
-#define NV50TCL_LOGIC_OP_SET 0x0000150f
-#define NV50TCL_CLEAR_BUFFERS 0x000019d0
-#define NV50TCL_CLEAR_BUFFERS_Z (1 << 0)
-#define NV50TCL_CLEAR_BUFFERS_S (1 << 1)
-#define NV50TCL_CLEAR_BUFFERS_R (1 << 2)
-#define NV50TCL_CLEAR_BUFFERS_G (1 << 3)
-#define NV50TCL_CLEAR_BUFFERS_B (1 << 4)
-#define NV50TCL_CLEAR_BUFFERS_A (1 << 5)
-#define NV50TCL_CLEAR_BUFFERS_RT_SHIFT 6
-#define NV50TCL_CLEAR_BUFFERS_RT_MASK 0x000003c0
-#define NV50TCL_CLEAR_BUFFERS_LAYER_SHIFT 10
-#define NV50TCL_CLEAR_BUFFERS_LAYER_MASK 0x0007fc00
-#define NV50TCL_COLOR_MASK(x) (0x00001a00+((x)*4))
-#define NV50TCL_COLOR_MASK__SIZE 0x00000008
-#define NV50TCL_COLOR_MASK_R_SHIFT 0
-#define NV50TCL_COLOR_MASK_R_MASK 0x0000000f
-#define NV50TCL_COLOR_MASK_G_SHIFT 4
-#define NV50TCL_COLOR_MASK_G_MASK 0x000000f0
-#define NV50TCL_COLOR_MASK_B_SHIFT 8
-#define NV50TCL_COLOR_MASK_B_MASK 0x00000f00
-#define NV50TCL_COLOR_MASK_A_SHIFT 12
-#define NV50TCL_COLOR_MASK_A_MASK 0x0000f000
-#define NV50TCL_STRMOUT_ADDRESS_HIGH(x) (0x00001a80+((x)*16))
-#define NV50TCL_STRMOUT_ADDRESS_HIGH__SIZE 0x00000004
-#define NV50TCL_STRMOUT_ADDRESS_LOW(x) (0x00001a84+((x)*16))
-#define NV50TCL_STRMOUT_ADDRESS_LOW__SIZE 0x00000004
-#define NV50TCL_STRMOUT_NUM_ATTRIBS(x) (0x00001a88+((x)*16))
-#define NV50TCL_STRMOUT_NUM_ATTRIBS__SIZE 0x00000004
-#define NV50TCL_VERTEX_ARRAY_ATTRIB(x) (0x00001ac0+((x)*4))
-#define NV50TCL_VERTEX_ARRAY_ATTRIB__SIZE 0x00000010
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_BUFFER_SHIFT 0
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_BUFFER_MASK 0x0000000f
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_CONST (1 << 4)
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_OFFSET_SHIFT 5
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_OFFSET_MASK 0x0007ffe0
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_SHIFT 19
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_MASK 0x01f80000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 0x00080000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32 0x00100000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16 0x00180000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32 0x00200000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16 0x00280000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8 0x00500000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16 0x00780000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32 0x00900000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8 0x00980000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_2_10_10_10 0x01800000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SHIFT 25
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_MASK 0x0e000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x0e000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x02000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x04000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x0a000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x0c000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x08000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x06000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_BGRA (1 << 31)
-#define NV50TCL_QUERY_ADDRESS_HIGH 0x00001b00
-#define NV50TCL_QUERY_ADDRESS_LOW 0x00001b04
-#define NV50TCL_QUERY_SEQUENCE 0x00001b08
-#define NV50TCL_QUERY_GET 0x00001b0c
-
-
-#define NV84TCL 0x00008297
-
-
-
-#define NVA0TCL 0x00008397
-
-
-
-#define NVA8TCL 0x00008597
-
-
-
-#define NV50_COMPUTE 0x000050c0
-
-#define NV50_COMPUTE_NOP 0x00000100
-#define NV50_COMPUTE_NOTIFY 0x00000104
-#define NV50_COMPUTE_SERIALIZE 0x00000110
-#define NV50_COMPUTE_DMA_NOTIFY 0x00000180
-#define NV50_COMPUTE_DMA_GLOBAL 0x000001a0
-#define NV50_COMPUTE_DMA_QUERY 0x000001a4
-#define NV50_COMPUTE_DMA_LOCAL 0x000001b8
-#define NV50_COMPUTE_DMA_STACK 0x000001bc
-#define NV50_COMPUTE_DMA_CODE_CB 0x000001c0
-#define NV50_COMPUTE_DMA_TSC 0x000001c4
-#define NV50_COMPUTE_DMA_TIC 0x000001c8
-#define NV50_COMPUTE_DMA_TEXTURE 0x000001cc
-#define NV50_COMPUTE_CP_ADDRESS_HIGH 0x00000210
-#define NV50_COMPUTE_CP_ADDRESS_LOW 0x00000214
-#define NV50_COMPUTE_STACK_ADDRESS_HIGH 0x00000218
-#define NV50_COMPUTE_STACK_ADDRESS_LOW 0x0000021c
-#define NV50_COMPUTE_STACK_SIZE_LOG 0x00000220
-#define NV50_COMPUTE_TSC_ADDRESS_HIGH 0x0000022c
-#define NV50_COMPUTE_TSC_ADDRESS_LOW 0x00000230
-#define NV50_COMPUTE_TSC_LIMIT 0x00000234
-#define NV50_COMPUTE_CB_ADDR 0x00000238
-#define NV50_COMPUTE_CB_ADDR_ID_SHIFT 8
-#define NV50_COMPUTE_CB_ADDR_ID_MASK 0x003fff00
-#define NV50_COMPUTE_CB_ADDR_BUFFER_SHIFT 0
-#define NV50_COMPUTE_CB_ADDR_BUFFER_MASK 0x0000007f
-#define NV50_COMPUTE_CB_DATA(x) (0x0000023c+((x)*4))
-#define NV50_COMPUTE_CB_DATA__SIZE 0x00000010
-#define NV50_COMPUTE_DELAY1 0x00000284
-#define NV50_COMPUTE_WATCHDOG_TIMER 0x00000288
-#define NV50_COMPUTE_DELAY2 0x0000028c
-#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH 0x00000294
-#define NV50_COMPUTE_LOCAL_ADDRESS_LOW 0x00000298
-#define NV50_COMPUTE_LOCAL_SIZE_LOG 0x0000029c
-#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH 0x000002a4
-#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW 0x000002a8
-#define NV50_COMPUTE_CB_DEF_SET 0x000002ac
-#define NV50_COMPUTE_CB_DEF_SET_SIZE_SHIFT 0
-#define NV50_COMPUTE_CB_DEF_SET_SIZE_MASK 0x0000ffff
-#define NV50_COMPUTE_CB_DEF_SET_BUFFER_SHIFT 16
-#define NV50_COMPUTE_CB_DEF_SET_BUFFER_MASK 0x007f0000
-#define NV50_COMPUTE_BLOCK_ALLOC 0x000002b4
-#define NV50_COMPUTE_BLOCK_ALLOC_THREADS_SHIFT 0
-#define NV50_COMPUTE_BLOCK_ALLOC_THREADS_MASK 0x0000ffff
-#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS_SHIFT 16
-#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS_MASK 0xffff0000
-#define NV50_COMPUTE_LANES32_ENABLE 0x000002b8
-#define NV50_COMPUTE_CP_REG_ALLOC_TEMP 0x000002c0
-#define NV50_COMPUTE_TIC_ADDRESS_HIGH 0x000002c4
-#define NV50_COMPUTE_TIC_ADDRESS_LOW 0x000002c8
-#define NV50_COMPUTE_TIC_LIMIT 0x000002cc
-#define NV50_COMPUTE_PM_SET(x) (0x000002d0+((x)*4))
-#define NV50_COMPUTE_PM_SET__SIZE 0x00000004
-#define NV50_COMPUTE_PM_CONTROL(x) (0x000002e0+((x)*4))
-#define NV50_COMPUTE_PM_CONTROL__SIZE 0x00000004
-#define NV50_COMPUTE_PM_CONTROL_UNK0 (1 << 0)
-#define NV50_COMPUTE_PM_CONTROL_UNK1_SHIFT 4
-#define NV50_COMPUTE_PM_CONTROL_UNK1_MASK 0x00000070
-#define NV50_COMPUTE_PM_CONTROL_UNK2_SHIFT 8
-#define NV50_COMPUTE_PM_CONTROL_UNK2_MASK 0xffffff00
-#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC 0x000002fc
-#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP 0x00000300
-#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC 0x00000304
-#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP 0x00000308
-#define NV50_COMPUTE_QUERY_ADDRESS_HIGH 0x00000310
-#define NV50_COMPUTE_QUERY_ADDRESS_LOW 0x00000314
-#define NV50_COMPUTE_QUERY_COUNTER 0x00000318
-#define NV50_COMPUTE_QUERY_GET 0x0000031c
-#define NV50_COMPUTE_COND_ADDRESS_HIGH 0x00000320
-#define NV50_COMPUTE_COND_ADDRESS_LOW 0x00000324
-#define NV50_COMPUTE_COND_MODE 0x00000328
-#define NV50_COMPUTE_COND_MODE_NEVER 0x00000000
-#define NV50_COMPUTE_COND_MODE_ALWAYS 0x00000001
-#define NV50_COMPUTE_COND_MODE_RES 0x00000002
-#define NV50_COMPUTE_COND_MODE_NOT_RES_AND_NOT_ID 0x00000003
-#define NV50_COMPUTE_COND_MODE_RES_OR_ID 0x00000004
-#define NV50_COMPUTE_LAUNCH 0x00000368
-#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374
-#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT_SHIFT 8
-#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT_MASK 0x0000ff00
-#define NV50_COMPUTE_LINKED_TSC 0x00000378
-#define NV50_COMPUTE_CODE_CB_FLUSH 0x00000380
-#define NV50_COMPUTE_GRIDDIM 0x000003a4
-#define NV50_COMPUTE_GRIDDIM_X_SHIFT 0
-#define NV50_COMPUTE_GRIDDIM_X_MASK 0x0000ffff
-#define NV50_COMPUTE_GRIDDIM_Y_SHIFT 16
-#define NV50_COMPUTE_GRIDDIM_Y_MASK 0xffff0000
-#define NV50_COMPUTE_SHARED_SIZE 0x000003a8
-#define NV50_COMPUTE_BLOCKDIM_YX 0x000003ac
-#define NV50_COMPUTE_BLOCKDIM_YX_X_SHIFT 0
-#define NV50_COMPUTE_BLOCKDIM_YX_X_MASK 0x0000ffff
-#define NV50_COMPUTE_BLOCKDIM_YX_Y_SHIFT 16
-#define NV50_COMPUTE_BLOCKDIM_YX_Y_MASK 0xffff0000
-#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0
-#define NV50_COMPUTE_CP_START_ID 0x000003b4
-#define NV50_COMPUTE_REG_MODE 0x000003b8
-#define NV50_COMPUTE_REG_MODE_PACKED 0x00000001
-#define NV50_COMPUTE_REG_MODE_STRIPED 0x00000002
-#define NV50_COMPUTE_TEX_LIMITS 0x000003bc
-#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2_SHIFT 0
-#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2_MASK 0x0000000f
-#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2_SHIFT 4
-#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2_MASK 0x000000f0
-#define NV50_COMPUTE_BIND_TSC 0x000003c0
-#define NV50_COMPUTE_BIND_TSC_VALID (1 << 0)
-#define NV50_COMPUTE_BIND_TSC_SAMPLER_SHIFT 4
-#define NV50_COMPUTE_BIND_TSC_SAMPLER_MASK 0x000000f0
-#define NV50_COMPUTE_BIND_TSC_TSC_SHIFT 12
-#define NV50_COMPUTE_BIND_TSC_TSC_MASK 0x001ff000
-#define NV50_COMPUTE_BIND_TIC 0x000003c4
-#define NV50_COMPUTE_BIND_TIC_VALID (1 << 0)
-#define NV50_COMPUTE_BIND_TIC_TEXTURE_SHIFT 1
-#define NV50_COMPUTE_BIND_TIC_TEXTURE_MASK 0x000001fe
-#define NV50_COMPUTE_BIND_TIC_TIC_SHIFT 9
-#define NV50_COMPUTE_BIND_TIC_TIC_MASK 0x7ffffe00
-#define NV50_COMPUTE_SET_PROGRAM_CB 0x000003c8
-#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX_SHIFT 8
-#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX_MASK 0x00000f00
-#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER_SHIFT 12
-#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER_MASK 0x0007f000
-#define NV50_COMPUTE_SET_PROGRAM_CB_VALID (1 << 0)
-#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(x) (0x00000400+((x)*32))
-#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH__SIZE 0x00000010
-#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(x) (0x00000404+((x)*32))
-#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW__SIZE 0x00000010
-#define NV50_COMPUTE_GLOBAL_PITCH(x) (0x00000408+((x)*32))
-#define NV50_COMPUTE_GLOBAL_PITCH__SIZE 0x00000010
-#define NV50_COMPUTE_GLOBAL_LIMIT(x) (0x0000040c+((x)*32))
-#define NV50_COMPUTE_GLOBAL_LIMIT__SIZE 0x00000010
-#define NV50_COMPUTE_GLOBAL_MODE(x) (0x00000410+((x)*32))
-#define NV50_COMPUTE_GLOBAL_MODE__SIZE 0x00000010
-#define NV50_COMPUTE_GLOBAL_MODE_LINEAR (1 << 0)
-#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE_SHIFT 8
-#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE_MASK 0x00000f00
-#define NV50_COMPUTE_USER_PARAM(x) (0x00000600+((x)*4))
-#define NV50_COMPUTE_USER_PARAM__SIZE 0x00000040
-
-
-#endif /* NOUVEAU_REG_H */
diff --git a/src/gallium/drivers/nv50/nv50_resource.c b/src/gallium/drivers/nv50/nv50_resource.c
index 6c0a969635..1ae4d70a84 100644
--- a/src/gallium/drivers/nv50/nv50_resource.c
+++ b/src/gallium/drivers/nv50/nv50_resource.c
@@ -4,64 +4,48 @@
#include "nouveau/nouveau_screen.h"
-/* This doesn't look quite right - this query is supposed to ask
- * whether the particular context has references to the resource in
- * any unflushed rendering command buffer, and hence requires a
- * pipe->flush() for serializing some modification to that resource.
- *
- * This seems to be answering the question of whether the resource is
- * currently on hardware.
- */
-static unsigned int
-nv50_resource_is_referenced(struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level, int layer)
-{
- return nouveau_reference_flags(nv50_resource(resource)->bo);
-}
-
static struct pipe_resource *
nv50_resource_create(struct pipe_screen *screen,
- const struct pipe_resource *template)
+ const struct pipe_resource *templ)
{
- if (template->target == PIPE_BUFFER)
- return nv50_buffer_create(screen, template);
- else
- return nv50_miptree_create(screen, template);
+ switch (templ->target) {
+ case PIPE_BUFFER:
+ return nouveau_buffer_create(screen, templ);
+ default:
+ return nv50_miptree_create(screen, templ);
+ }
}
static struct pipe_resource *
nv50_resource_from_handle(struct pipe_screen * screen,
- const struct pipe_resource *template,
- struct winsys_handle *whandle)
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
{
- if (template->target == PIPE_BUFFER)
- return NULL;
- else
- return nv50_miptree_from_handle(screen, template, whandle);
+ if (templ->target == PIPE_BUFFER)
+ return NULL;
+ else
+ return nv50_miptree_from_handle(screen, templ, whandle);
}
void
nv50_init_resource_functions(struct pipe_context *pcontext)
{
- pcontext->get_transfer = u_get_transfer_vtbl;
- pcontext->transfer_map = u_transfer_map_vtbl;
- pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
- pcontext->transfer_unmap = u_transfer_unmap_vtbl;
- pcontext->transfer_destroy = u_transfer_destroy_vtbl;
- pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
- pcontext->is_resource_referenced = nv50_resource_is_referenced;
-
- pcontext->create_surface = nv50_miptree_surface_new;
- pcontext->surface_destroy = nv50_miptree_surface_del;
+ pcontext->get_transfer = u_get_transfer_vtbl;
+ pcontext->transfer_map = u_transfer_map_vtbl;
+ pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
+ pcontext->transfer_unmap = u_transfer_unmap_vtbl;
+ pcontext->transfer_destroy = u_transfer_destroy_vtbl;
+ pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
+ pcontext->create_surface = nv50_miptree_surface_new;
+ pcontext->surface_destroy = nv50_miptree_surface_del;
}
void
nv50_screen_init_resource_functions(struct pipe_screen *pscreen)
{
- pscreen->resource_create = nv50_resource_create;
- pscreen->resource_from_handle = nv50_resource_from_handle;
- pscreen->resource_get_handle = u_resource_get_handle_vtbl;
- pscreen->resource_destroy = u_resource_destroy_vtbl;
- pscreen->user_buffer_create = nv50_user_buffer_create;
+ pscreen->resource_create = nv50_resource_create;
+ pscreen->resource_from_handle = nv50_resource_from_handle;
+ pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+ pscreen->resource_destroy = u_resource_destroy_vtbl;
+ pscreen->user_buffer_create = nouveau_user_buffer_create;
}
diff --git a/src/gallium/drivers/nv50/nv50_resource.h b/src/gallium/drivers/nv50/nv50_resource.h
index 4b2a75e11a..0e9f0a2557 100644
--- a/src/gallium/drivers/nv50/nv50_resource.h
+++ b/src/gallium/drivers/nv50/nv50_resource.h
@@ -1,97 +1,70 @@
-#ifndef NV50_RESOURCE_H
-#define NV50_RESOURCE_H
+#ifndef __NV50_RESOURCE_H__
+#define __NV50_RESOURCE_H__
#include "util/u_transfer.h"
-
+#include "util/u_double_list.h"
+#define NOUVEAU_NVC0
#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_buffer.h"
+#undef NOUVEAU_NVC0
+
+void
+nv50_init_resource_functions(struct pipe_context *pcontext);
-struct pipe_resource;
-struct nouveau_bo;
+void
+nv50_screen_init_resource_functions(struct pipe_screen *pscreen);
+#define NV50_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf)
-/* This gets further specialized into either buffer or texture
- * structures. In the future we'll want to remove much of that
- * distinction, but for now try to keep as close to the existing code
- * as possible and use the vtbl struct to choose between the two
- * underlying implementations.
- */
-struct nv50_resource {
- struct pipe_resource base;
- const struct u_resource_vtbl *vtbl;
- struct nouveau_bo *bo;
-};
+#define NV50_TILE_PITCH(m) (64 << 0)
+#define NV50_TILE_HEIGHT(m) ( 4 << NV50_TILE_DIM_SHIFT(m, 0))
+#define NV50_TILE_DEPTH(m) ( 1 << NV50_TILE_DIM_SHIFT(m, 1))
+
+#define NV50_TILE_SIZE_2D(m) ((64 * 4) << \
+ NV50_TILE_DIM_SHIFT(m, 0))
+
+#define NV50_TILE_SIZE(m) (NV50_TILE_SIZE_2D(m) << NV50_TILE_DIM_SHIFT(m, 1))
struct nv50_miptree_level {
- int *image_offset;
- unsigned pitch;
- unsigned tile_mode;
+ uint32_t offset;
+ uint32_t pitch;
+ uint32_t tile_mode;
};
#define NV50_MAX_TEXTURE_LEVELS 16
struct nv50_miptree {
- struct nv50_resource base;
-
- struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS];
- int image_nr;
- int total_size;
+ struct nv04_resource base;
+ struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS];
+ uint32_t total_size;
+ uint32_t layer_stride;
+ boolean layout_3d; /* TRUE if layer count varies with mip level */
};
static INLINE struct nv50_miptree *
nv50_miptree(struct pipe_resource *pt)
{
- return (struct nv50_miptree *)pt;
-}
-
-
-static INLINE
-struct nv50_resource *nv50_resource(struct pipe_resource *resource)
-{
- return (struct nv50_resource *)resource;
+ return (struct nv50_miptree *)pt;
}
-/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
-static INLINE boolean
-nv50_resource_mapped_by_gpu(struct pipe_resource *resource)
-{
- return nv50_resource(resource)->bo->handle;
-}
-
-void
-nv50_init_resource_functions(struct pipe_context *pcontext);
-
-void
-nv50_screen_init_resource_functions(struct pipe_screen *pscreen);
-
-/* Internal functions
+/* Internal functions:
*/
struct pipe_resource *
nv50_miptree_create(struct pipe_screen *pscreen,
- const struct pipe_resource *tmp);
+ const struct pipe_resource *tmp);
struct pipe_resource *
nv50_miptree_from_handle(struct pipe_screen *pscreen,
- const struct pipe_resource *template,
- struct winsys_handle *whandle);
-
-struct pipe_resource *
-nv50_buffer_create(struct pipe_screen *pscreen,
- const struct pipe_resource *template);
-
-struct pipe_resource *
-nv50_user_buffer_create(struct pipe_screen *screen,
- void *ptr,
- unsigned bytes,
- unsigned usage);
-
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle);
struct pipe_surface *
-nv50_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt,
- const struct pipe_surface *surf_tmpl);
+nv50_miptree_surface_new(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_surface *templ);
void
-nv50_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps);
-
+nv50_miptree_surface_del(struct pipe_context *, struct pipe_surface *);
#endif
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index edc3d54d01..7690c80eef 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Ben Skeggs
+ * Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,596 +25,625 @@
#include "nv50_context.h"
#include "nv50_screen.h"
-#include "nv50_resource.h"
-#include "nv50_program.h"
-#include "nouveau/nouveau_stateobj.h"
+#include "nouveau/nv_object.xml.h"
+
+#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
+# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
+#endif
+
+extern int nouveau_device_get_param(struct nouveau_device *dev,
+ uint64_t param, uint64_t *value);
static boolean
nv50_screen_is_format_supported(struct pipe_screen *pscreen,
- enum pipe_format format,
- enum pipe_texture_target target,
- unsigned sample_count,
- unsigned usage, unsigned geom_flags)
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bindings)
{
- if (sample_count > 1)
- return FALSE;
-
- if (!util_format_s3tc_enabled) {
- switch (format) {
- case PIPE_FORMAT_DXT1_RGB:
- case PIPE_FORMAT_DXT1_RGBA:
- case PIPE_FORMAT_DXT3_RGBA:
- case PIPE_FORMAT_DXT5_RGBA:
- return FALSE;
- default:
- break;
- }
- }
-
- switch (format) {
- case PIPE_FORMAT_Z16_UNORM:
- if ((nouveau_screen(pscreen)->device->chipset & 0xf0) != 0xa0)
- return FALSE;
- break;
- default:
- break;
- }
-
- /* transfers & shared are always supported */
- usage &= ~(PIPE_BIND_TRANSFER_READ |
- PIPE_BIND_TRANSFER_WRITE |
- PIPE_BIND_SHARED);
-
- return (nv50_format_table[format].usage & usage) == usage;
+ if (sample_count > 1)
+ return FALSE;
+
+ if (!util_format_s3tc_enabled) {
+ switch (format) {
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return FALSE;
+ default:
+ break;
+ }
+ }
+
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ if (nv50_screen(pscreen)->tesla->grclass < NVA0_3D)
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+
+ /* transfers & shared are always supported */
+ bindings &= ~(PIPE_BIND_TRANSFER_READ |
+ PIPE_BIND_TRANSFER_WRITE |
+ PIPE_BIND_SHARED);
+
+ return (nv50_format_table[format].usage & bindings) == bindings;
}
static int
nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
{
- switch (param) {
- case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
- return 32;
- case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return 32;
- case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return 64;
- case PIPE_CAP_NPOT_TEXTURES:
- return 1;
- case PIPE_CAP_TWO_SIDED_STENCIL:
- return 1;
- case PIPE_CAP_GLSL:
- case PIPE_CAP_SM3:
- return 1;
- case PIPE_CAP_ANISOTROPIC_FILTER:
- return 1;
- case PIPE_CAP_POINT_SPRITE:
- return 1;
- case PIPE_CAP_MAX_RENDER_TARGETS:
- return 8;
- case PIPE_CAP_OCCLUSION_QUERY:
- return 1;
- case PIPE_CAP_TIMER_QUERY:
- return 0;
- case PIPE_CAP_STREAM_OUTPUT:
- return 0;
- case PIPE_CAP_TEXTURE_SHADOW_MAP:
- return 1;
- case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
- return 13;
- case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- return 10;
- case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- return 13;
- case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
- return 1;
- case PIPE_CAP_TEXTURE_SWIZZLE:
- return 1;
- case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- return 1;
- case PIPE_CAP_INDEP_BLEND_ENABLE:
- return 1;
- case PIPE_CAP_INDEP_BLEND_FUNC:
- return 0;
- case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
- return 1;
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
- return 1;
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
- return 0;
- case PIPE_CAP_DEPTH_CLAMP:
- return 1;
- case PIPE_CAP_SHADER_STENCIL_EXPORT:
- return 0;
- case PIPE_CAP_PRIMITIVE_RESTART:
- return 0;
- default:
- NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
- return 0;
- }
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 32;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 64;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_ARRAY_TEXTURES: /* shader support missing */
+ return 0;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_DEPTH_CLAMP:
+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_GLSL:
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+ case PIPE_CAP_TIMER_QUERY:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_STREAM_OUTPUT:
+ return 0;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return nv50_screen(pscreen)->tesla->grclass >= NVA3_3D;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 0;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 0;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ return 1;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
}
static int
nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
- enum pipe_shader_cap param)
+ enum pipe_shader_cap param)
{
- switch(shader) {
- case PIPE_SHADER_FRAGMENT:
- case PIPE_SHADER_VERTEX:
- break;
- case PIPE_SHADER_GEOMETRY:
- default:
- return 0;
- }
-
- switch(param) {
- case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: /* arbitrary limit */
- return 16384;
- case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: /* need stack bo */
- return 4;
- case PIPE_SHADER_CAP_MAX_INPUTS: /* 128 / 4 with GP */
- if (shader == PIPE_SHADER_GEOMETRY)
- return 128 / 4;
- else
- return 64 / 4;
- case PIPE_SHADER_CAP_MAX_CONSTS:
- return 65536 / 16;
- case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: /* 16 - 1, but not implemented */
- return 1;
- case PIPE_SHADER_CAP_MAX_ADDRS: /* no spilling atm */
- return 1;
- case PIPE_SHADER_CAP_MAX_PREDS: /* not yet handled */
- return 0;
- case PIPE_SHADER_CAP_MAX_TEMPS: /* no spilling atm */
- return NV50_CAP_MAX_PROGRAM_TEMPS;
- case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
- return 1;
- case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
- case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
- case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
- case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
- return 1;
- case PIPE_SHADER_CAP_SUBROUTINES:
- return 0;
- default:
- return 0;
- }
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_GEOMETRY:
+ case PIPE_SHADER_FRAGMENT:
+ break;
+ default:
+ return 0;
+ }
+
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 4;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_VERTEX)
+ return 32;
+ return 0x300 / 16;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return 65536 / 16;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return 14;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return 1;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ return shader != PIPE_SHADER_FRAGMENT;
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return NV50_CAP_MAX_PROGRAM_TEMPS;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 0; /* please inline, or provide function declarations */
+ default:
+ NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
+ return 0;
+ }
}
static float
nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
{
- switch (param) {
- case PIPE_CAP_MAX_LINE_WIDTH:
- case PIPE_CAP_MAX_LINE_WIDTH_AA:
- return 10.0;
- case PIPE_CAP_MAX_POINT_WIDTH:
- case PIPE_CAP_MAX_POINT_WIDTH_AA:
- return 64.0;
- case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
- return 16.0;
- case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
- return 4.0;
- default:
- NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
- return 0.0;
- }
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0f;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0f;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0f;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0.0f;
+ }
}
static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
- struct nv50_screen *screen = nv50_screen(pscreen);
- unsigned i;
-
- for (i = 0; i < 3; i++) {
- if (screen->constbuf_parm[i])
- nouveau_bo_ref(NULL, &screen->constbuf_parm[i]);
- }
-
- if (screen->constbuf_misc[0])
- nouveau_bo_ref(NULL, &screen->constbuf_misc[0]);
- if (screen->tic)
- nouveau_bo_ref(NULL, &screen->tic);
- if (screen->tsc)
- nouveau_bo_ref(NULL, &screen->tsc);
-
- nouveau_notifier_free(&screen->sync);
- nouveau_grobj_free(&screen->tesla);
- nouveau_grobj_free(&screen->eng2d);
- nouveau_grobj_free(&screen->m2mf);
- nouveau_resource_destroy(&screen->immd_heap);
- nouveau_screen_fini(&screen->base);
- FREE(screen);
-}
+ struct nv50_screen *screen = nv50_screen(pscreen);
-#define BGN_RELOC(ch, bo, gr, m, n, fl) \
- OUT_RELOC(ch, bo, (n << 18) | (gr->subc << 13) | m, fl, 0, 0)
+ if (screen->base.fence.current) {
+ nouveau_fence_wait(screen->base.fence.current);
+ nouveau_fence_ref (NULL, &screen->base.fence.current);
+ }
-void
-nv50_screen_reloc_constbuf(struct nv50_screen *screen, unsigned cbi)
+ nouveau_bo_ref(NULL, &screen->code);
+ nouveau_bo_ref(NULL, &screen->tls_bo);
+ nouveau_bo_ref(NULL, &screen->stack_bo);
+ nouveau_bo_ref(NULL, &screen->txc);
+ nouveau_bo_ref(NULL, &screen->uniforms);
+ nouveau_bo_ref(NULL, &screen->fence.bo);
+
+ nouveau_resource_destroy(&screen->vp_code_heap);
+ nouveau_resource_destroy(&screen->gp_code_heap);
+ nouveau_resource_destroy(&screen->fp_code_heap);
+
+ if (screen->tic.entries)
+ FREE(screen->tic.entries);
+
+ nouveau_mm_destroy(screen->mm_VRAM_fe0);
+
+ nouveau_grobj_free(&screen->tesla);
+ nouveau_grobj_free(&screen->eng2d);
+ nouveau_grobj_free(&screen->m2mf);
+
+ nouveau_notifier_free(&screen->sync);
+
+ nouveau_screen_fini(&screen->base);
+
+ FREE(screen);
+}
+
+static void
+nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 sequence)
{
- struct nouveau_bo *bo;
- struct nouveau_channel *chan = screen->base.channel;
- struct nouveau_grobj *tesla = screen->tesla;
- unsigned size;
- const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
-
- switch (cbi) {
- case NV50_CB_PMISC:
- bo = screen->constbuf_misc[0];
- size = 0x200;
- break;
- case NV50_CB_PVP:
- case NV50_CB_PFP:
- case NV50_CB_PGP:
- bo = screen->constbuf_parm[cbi - NV50_CB_PVP];
- size = 0;
- break;
- default:
- return;
- }
-
- BGN_RELOC (chan, bo, tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl);
- OUT_RELOCh(chan, bo, 0, rl);
- OUT_RELOCl(chan, bo, 0, rl);
- OUT_RELOC (chan, bo, (cbi << 16) | size, rl, 0, 0);
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nouveau_channel *chan = screen->base.channel;
+
+ MARK_RING (chan, 5, 2);
+ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
+ OUT_RING (chan, sequence);
+ OUT_RING (chan, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |
+ NV50_3D_QUERY_GET_UNK4 |
+ NV50_3D_QUERY_GET_UNIT_CROP |
+ NV50_3D_QUERY_GET_TYPE_QUERY |
+ NV50_3D_QUERY_GET_QUERY_SELECT_ZERO |
+ NV50_3D_QUERY_GET_SHORT);
}
-void
-nv50_screen_relocs(struct nv50_screen *screen)
+static u32
+nv50_screen_fence_update(struct pipe_screen *pscreen)
{
- struct nouveau_channel *chan = screen->base.channel;
- struct nouveau_grobj *tesla = screen->tesla;
- unsigned i;
- const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ return screen->fence.map[0];
+}
- MARK_RING (chan, 28, 26);
+#define FAIL_SCREEN_INIT(str, err) \
+ do { \
+ NOUVEAU_ERR(str, err); \
+ nv50_screen_destroy(pscreen); \
+ return NULL; \
+ } while(0)
- /* cause grobj autobind */
- BEGIN_RING(chan, tesla, 0x0100, 1);
- OUT_RING (chan, 0);
+struct pipe_screen *
+nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
+{
+ struct nv50_screen *screen;
+ struct nouveau_channel *chan;
+ struct pipe_screen *pscreen;
+ uint64_t value;
+ uint32_t tesla_class;
+ unsigned stack_size, max_warps, tls_space;
+ int ret;
+ unsigned i, base;
+
+ screen = CALLOC_STRUCT(nv50_screen);
+ if (!screen)
+ return NULL;
+ pscreen = &screen->base.base;
+
+ screen->base.sysmem_bindings = PIPE_BIND_CONSTANT_BUFFER;
+
+ ret = nouveau_screen_init(&screen->base, dev);
+ if (ret)
+ FAIL_SCREEN_INIT("nouveau_screen_init failed: %d\n", ret);
+
+ chan = screen->base.channel;
+
+ pscreen->winsys = ws;
+ pscreen->destroy = nv50_screen_destroy;
+ pscreen->context_create = nv50_create;
+ pscreen->is_format_supported = nv50_screen_is_format_supported;
+ pscreen->get_param = nv50_screen_get_param;
+ pscreen->get_shader_param = nv50_screen_get_shader_param;
+ pscreen->get_paramf = nv50_screen_get_paramf;
+
+ nv50_screen_init_resource_functions(pscreen);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
+ &screen->fence.bo);
+ if (ret)
+ goto fail;
+ nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR);
+ screen->fence.map = screen->fence.bo->map;
+ nouveau_bo_unmap(screen->fence.bo);
+ screen->base.fence.emit = nv50_screen_fence_emit;
+ screen->base.fence.update = nv50_screen_fence_update;
+
+ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating notifier: %d\n", ret);
+
+ ret = nouveau_grobj_alloc(chan, 0xbeef5039, NV50_M2MF, &screen->m2mf);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
+
+ BIND_RING (chan, screen->m2mf, NV50_SUBCH_MF);
+ BEGIN_RING(chan, RING_MF_(NV04_M2MF_DMA_NOTIFY), 3);
+ OUT_RING (chan, screen->sync->handle);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
+
+ ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret);
+
+ BIND_RING (chan, screen->eng2d, NV50_SUBCH_2D);
+ BEGIN_RING(chan, RING_2D(DMA_NOTIFY), 4);
+ OUT_RING (chan, screen->sync->handle);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
+ BEGIN_RING(chan, RING_2D(OPERATION), 1);
+ OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY);
+ BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_2D_(0x0888), 1);
+ OUT_RING (chan, 1);
+
+ switch (dev->chipset & 0xf0) {
+ case 0x50:
+ tesla_class = NV50_3D;
+ break;
+ case 0x80:
+ case 0x90:
+ tesla_class = NV84_3D;
+ break;
+ case 0xa0:
+ switch (dev->chipset) {
+ case 0xa0:
+ case 0xaa:
+ case 0xac:
+ tesla_class = NVA0_3D;
+ break;
+ case 0xaf:
+ tesla_class = NVAF_3D;
+ break;
+ default:
+ tesla_class = NVA3_3D;
+ break;
+ }
+ break;
+ default:
+ FAIL_SCREEN_INIT("Not a known NV50 chipset: NV%02x\n", dev->chipset);
+ break;
+ }
+
+ ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class, &screen->tesla);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
+
+ BIND_RING (chan, screen->tesla, NV50_SUBCH_3D);
+
+ BEGIN_RING(chan, RING_3D(COND_MODE), 1);
+ OUT_RING (chan, NV50_3D_COND_MODE_ALWAYS);
+
+ BEGIN_RING(chan, RING_3D(DMA_NOTIFY), 1);
+ OUT_RING (chan, screen->sync->handle);
+ BEGIN_RING(chan, RING_3D(DMA_ZETA), 11);
+ for (i = 0; i < 11; ++i)
+ OUT_RING(chan, chan->vram->handle);
+ BEGIN_RING(chan, RING_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN);
+ for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i)
+ OUT_RING(chan, chan->vram->handle);
+
+ BEGIN_RING(chan, RING_3D(REG_MODE), 1);
+ OUT_RING (chan, NV50_3D_REG_MODE_STRIPED);
+ BEGIN_RING(chan, RING_3D(UNK1400_LANES), 1);
+ OUT_RING (chan, 0xf);
+
+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+ OUT_RING (chan, 1);
+
+ BEGIN_RING(chan, RING_3D(CSAA_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1);
+ OUT_RING (chan, NV50_3D_MULTISAMPLE_MODE_MS1);
+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(ZCULL_REGION), 1); /* deactivate ZCULL */
+ OUT_RING (chan, 0x3f);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
+ 3 << NV50_CODE_BO_SIZE_LOG2, &screen->code);
+ if (ret)
+ goto fail;
+
+ nouveau_resource_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
+ nouveau_resource_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
+ nouveau_resource_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
+
+ base = 1 << NV50_CODE_BO_SIZE_LOG2;
+
+ BEGIN_RING(chan, RING_3D(VP_ADDRESS_HIGH), 2);
+ OUT_RELOCh(chan, screen->code, base * 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->code, base * 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+
+ BEGIN_RING(chan, RING_3D(FP_ADDRESS_HIGH), 2);
+ OUT_RELOCh(chan, screen->code, base * 1, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->code, base * 1, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+
+ BEGIN_RING(chan, RING_3D(GP_ADDRESS_HIGH), 2);
+ OUT_RELOCh(chan, screen->code, base * 2, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->code, base * 2, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+
+ nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
+
+ max_warps = util_bitcount(value & 0xffff);
+ max_warps *= util_bitcount((value >> 24) & 0xf) * 32;
+
+ stack_size = max_warps * 64 * 8;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size,
+ &screen->stack_bo);
+ if (ret)
+ FAIL_SCREEN_INIT("Failed to allocate stack bo: %d\n", ret);
+
+ BEGIN_RING(chan, RING_3D(STACK_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, 4);
+
+ tls_space = NV50_CAP_MAX_PROGRAM_TEMPS * 16;
+
+ screen->tls_size = tls_space * max_warps * 32;
+
+ debug_printf("max_warps = %i, tls_size = %lu KiB\n",
+ max_warps, screen->tls_size >> 10);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, screen->tls_size,
+ &screen->tls_bo);
+ if (ret)
+ FAIL_SCREEN_INIT("Failed to allocate stack bo: %d\n", ret);
+
+ BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->tls_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, screen->tls_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, util_unsigned_logbase2(tls_space / 8));
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16,
+ &screen->uniforms);
+ if (ret)
+ goto fail;
+
+ BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->uniforms, 0 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->uniforms, 0 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, (NV50_CB_PVP << 16) | 0x0000);
+
+ BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->uniforms, 1 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->uniforms, 1 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, (NV50_CB_PGP << 16) | 0x0000);
+
+ BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->uniforms, 2 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->uniforms, 2 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, (NV50_CB_PFP << 16) | 0x0000);
+
+ BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->uniforms, 3 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->uniforms, 3 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, (NV50_CB_AUX << 16) | 0x0200);
+
+ BEGIN_RING_NI(chan, RING_3D(SET_PROGRAM_CB), 6);
+ OUT_RING (chan, (NV50_CB_PVP << 12) | 0x001);
+ OUT_RING (chan, (NV50_CB_PGP << 12) | 0x021);
+ OUT_RING (chan, (NV50_CB_PFP << 12) | 0x031);
+ OUT_RING (chan, (NV50_CB_AUX << 12) | 0xf01);
+ OUT_RING (chan, (NV50_CB_AUX << 12) | 0xf21);
+ OUT_RING (chan, (NV50_CB_AUX << 12) | 0xf31);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16,
+ &screen->txc);
+ if (ret)
+ FAIL_SCREEN_INIT("Could not allocate TIC/TSC bo: %d\n", ret);
+
+ /* max TIC (bits 4:8) & TSC bindings, per program type */
+ for (i = 0; i < 3; ++i) {
+ BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1);
+ OUT_RING (chan, 0x54);
+ }
+
+ BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, NV50_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, NV50_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_RING(chan, RING_3D(LINKED_TSC), 1);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, RING_3D(CLIP_RECTS_EN), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(CLIP_RECTS_MODE), 1);
+ OUT_RING (chan, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY);
+ BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
+ for (i = 0; i < 8 * 2; ++i)
+ OUT_RING(chan, 0);
+ BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
+ OUT_RINGf (chan, 0.0f);
+ OUT_RINGf (chan, 1.0f);
+
+ BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+#ifdef NV50_SCISSORS_CLIPPING
+ OUT_RING (chan, 0x0000);
+#else
+ OUT_RING (chan, 0x1080);
+#endif
+
+ BEGIN_RING(chan, RING_3D(CLEAR_FLAGS), 1);
+ OUT_RING (chan, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT);
- BGN_RELOC (chan, screen->tic, tesla, NV50TCL_TIC_ADDRESS_HIGH, 2, rl);
- OUT_RELOCh(chan, screen->tic, 0, rl);
- OUT_RELOCl(chan, screen->tic, 0, rl);
+ /* We use scissors instead of exact view volume clipping,
+ * so they're always enabled.
+ */
+ BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 8192 << 16);
+ OUT_RING (chan, 8192 << 16);
- BGN_RELOC (chan, screen->tsc, tesla, NV50TCL_TSC_ADDRESS_HIGH, 2, rl);
- OUT_RELOCh(chan, screen->tsc, 0, rl);
- OUT_RELOCl(chan, screen->tsc, 0, rl);
+ BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(POINT_RASTER_RULES), 1);
+ OUT_RING (chan, NV50_3D_POINT_RASTER_RULES_OGL);
+ BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1);
+ OUT_RING (chan, 0x11111111);
+ BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1);
+ OUT_RING (chan, 1);
- nv50_screen_reloc_constbuf(screen, NV50_CB_PMISC);
+ FIRE_RING (chan);
- BGN_RELOC (chan, screen->constbuf_misc[0],
- tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl);
- OUT_RELOCh(chan, screen->constbuf_misc[0], 0x200, rl);
- OUT_RELOCl(chan, screen->constbuf_misc[0], 0x200, rl);
- OUT_RELOC (chan, screen->constbuf_misc[0],
- (NV50_CB_AUX << 16) | 0x0200, rl, 0, 0);
+ screen->tic.entries = CALLOC(4096, sizeof(void *));
+ screen->tsc.entries = screen->tic.entries + 2048;
- for (i = 0; i < 3; ++i)
- nv50_screen_reloc_constbuf(screen, NV50_CB_PVP + i);
+ screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0);
- BGN_RELOC (chan, screen->stack_bo,
- tesla, NV50TCL_STACK_ADDRESS_HIGH, 2, rl);
- OUT_RELOCh(chan, screen->stack_bo, 0, rl);
- OUT_RELOCl(chan, screen->stack_bo, 0, rl);
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
- if (!screen->cur_ctx->req_lmem)
- return;
+ return pscreen;
- BGN_RELOC (chan, screen->local_bo,
- tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 2, rl);
- OUT_RELOCh(chan, screen->local_bo, 0, rl);
- OUT_RELOCl(chan, screen->local_bo, 0, rl);
+fail:
+ nv50_screen_destroy(pscreen);
+ return NULL;
}
-#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
-# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
-#endif
+void
+nv50_screen_make_buffers_resident(struct nv50_screen *screen)
+{
+ struct nouveau_channel *chan = screen->base.channel;
-extern int nouveau_device_get_param(struct nouveau_device *dev,
- uint64_t param, uint64_t *value);
+ const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
-struct pipe_screen *
-nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
+ MARK_RING(chan, 5, 5);
+ nouveau_bo_validate(chan, screen->code, flags);
+ nouveau_bo_validate(chan, screen->uniforms, flags);
+ nouveau_bo_validate(chan, screen->txc, flags);
+ nouveau_bo_validate(chan, screen->tls_bo, flags);
+ nouveau_bo_validate(chan, screen->stack_bo, flags);
+}
+
+int
+nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry)
{
- struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
- struct nouveau_channel *chan;
- struct pipe_screen *pscreen;
- uint64_t value;
- unsigned chipset = dev->chipset;
- unsigned tesla_class = 0;
- unsigned stack_size, local_size, max_warps;
- int ret, i;
- const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
-
- if (!screen)
- return NULL;
- pscreen = &screen->base.base;
-
- ret = nouveau_screen_init(&screen->base, dev);
- if (ret) {
- nv50_screen_destroy(pscreen);
- return NULL;
- }
- chan = screen->base.channel;
-
- pscreen->winsys = ws;
- pscreen->destroy = nv50_screen_destroy;
- pscreen->get_param = nv50_screen_get_param;
- pscreen->get_shader_param = nv50_screen_get_shader_param;
- pscreen->get_paramf = nv50_screen_get_paramf;
- pscreen->is_format_supported = nv50_screen_is_format_supported;
- pscreen->context_create = nv50_create;
-
- nv50_screen_init_resource_functions(pscreen);
-
- /* DMA engine object */
- ret = nouveau_grobj_alloc(chan, 0xbeef5039,
- NV50_MEMORY_TO_MEMORY_FORMAT, &screen->m2mf);
- if (ret) {
- NOUVEAU_ERR("Error creating M2MF object: %d\n", ret);
- nv50_screen_destroy(pscreen);
- return NULL;
- }
-
- /* 2D object */
- ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d);
- if (ret) {
- NOUVEAU_ERR("Error creating 2D object: %d\n", ret);
- nv50_screen_destroy(pscreen);
- return NULL;
- }
-
- /* 3D object */
- switch (chipset & 0xf0) {
- case 0x50:
- tesla_class = NV50TCL;
- break;
- case 0x80:
- case 0x90:
- tesla_class = NV84TCL;
- break;
- case 0xa0:
- switch (chipset) {
- case 0xa0:
- case 0xaa:
- case 0xac:
- tesla_class = NVA0TCL;
- break;
- default:
- tesla_class = NVA8TCL;
- break;
- }
- break;
- default:
- NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset);
- nv50_screen_destroy(pscreen);
- return NULL;
- }
-
- ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class,
- &screen->tesla);
- if (ret) {
- NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
- nv50_screen_destroy(pscreen);
- return NULL;
- }
-
- /* this is necessary for the new RING_3D / statebuffer code */
- BIND_RING(chan, screen->tesla, 7);
-
- /* Sync notifier */
- ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
- if (ret) {
- NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
- nv50_screen_destroy(pscreen);
- return NULL;
- }
-
- /* Static M2MF init */
- BEGIN_RING(chan, screen->m2mf,
- NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
- OUT_RING (chan, screen->sync->handle);
- OUT_RING (chan, chan->vram->handle);
- OUT_RING (chan, chan->vram->handle);
-
- /* Static 2D init */
- BEGIN_RING(chan, screen->eng2d, NV50_2D_DMA_NOTIFY, 4);
- OUT_RING (chan, screen->sync->handle);
- OUT_RING (chan, chan->vram->handle);
- OUT_RING (chan, chan->vram->handle);
- OUT_RING (chan, chan->vram->handle);
- BEGIN_RING(chan, screen->eng2d, NV50_2D_OPERATION, 1);
- OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY);
- BEGIN_RING(chan, screen->eng2d, NV50_2D_CLIP_ENABLE, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, screen->eng2d, 0x0888, 1);
- OUT_RING (chan, 1);
-
- /* Static tesla init */
- BEGIN_RING(chan, screen->tesla, NV50TCL_COND_MODE, 1);
- OUT_RING (chan, NV50TCL_COND_MODE_ALWAYS);
- BEGIN_RING(chan, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
- OUT_RING (chan, screen->sync->handle);
- BEGIN_RING(chan, screen->tesla, NV50TCL_DMA_ZETA, 11);
- for (i = 0; i < 11; i++)
- OUT_RING (chan, chan->vram->handle);
- BEGIN_RING(chan, screen->tesla,
- NV50TCL_DMA_COLOR(0), NV50TCL_DMA_COLOR__SIZE);
- for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++)
- OUT_RING (chan, chan->vram->handle);
-
- BEGIN_RING(chan, screen->tesla, NV50TCL_RT_CONTROL, 1);
- OUT_RING (chan, 1);
-
- /* activate all 32 lanes (threads) in a warp */
- BEGIN_RING(chan, screen->tesla, NV50TCL_REG_MODE, 1);
- OUT_RING (chan, NV50TCL_REG_MODE_STRIPED);
- BEGIN_RING(chan, screen->tesla, 0x1400, 1);
- OUT_RING (chan, 0xf);
-
- /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
- for (i = 0; i < 3; ++i) {
- BEGIN_RING(chan, screen->tesla, NV50TCL_TEX_LIMITS(i), 1);
- OUT_RING (chan, 0x54);
- }
-
- /* origin is top left (set to 1 for bottom left) */
- BEGIN_RING(chan, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
- OUT_RING (chan, 8);
-
- BEGIN_RING(chan, screen->tesla, NV50TCL_CLEAR_FLAGS, 1);
- OUT_RING (chan, NV50TCL_CLEAR_FLAGS_D3D);
-
- /* constant buffers for immediates and VP/FP parameters */
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4,
- &screen->constbuf_misc[0]);
- if (ret) {
- nv50_screen_destroy(pscreen);
- return NULL;
- }
- BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- OUT_RELOCh(chan, screen->constbuf_misc[0], 0, rl);
- OUT_RELOCl(chan, screen->constbuf_misc[0], 0, rl);
- OUT_RING (chan, (NV50_CB_PMISC << 16) | 0x0200);
- BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- OUT_RELOCh(chan, screen->constbuf_misc[0], 0x200, rl);
- OUT_RELOCl(chan, screen->constbuf_misc[0], 0x200, rl);
- OUT_RING (chan, (NV50_CB_AUX << 16) | 0x0200);
-
- for (i = 0; i < 3; i++) {
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (4096 * 4) * 4,
- &screen->constbuf_parm[i]);
- if (ret) {
- nv50_screen_destroy(pscreen);
- return NULL;
- }
- BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl);
- OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl);
- /* CB_DEF_SET_SIZE value of 0x0000 means 65536 */
- OUT_RING (chan, ((NV50_CB_PVP + i) << 16) | 0x0000);
- }
-
- if (nouveau_resource_init(&screen->immd_heap, 0, 128)) {
- NOUVEAU_ERR("Error initialising shader immediates heap.\n");
- nv50_screen_destroy(pscreen);
- return NULL;
- }
-
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 3 * 32 * (8 * 4),
- &screen->tic);
- if (ret) {
- nv50_screen_destroy(pscreen);
- return NULL;
- }
- BEGIN_RING(chan, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3);
- OUT_RELOCh(chan, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RELOCl(chan, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RING (chan, 3 * 32 - 1);
-
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 3 * 32 * (8 * 4),
- &screen->tsc);
- if (ret) {
- nv50_screen_destroy(pscreen);
- return NULL;
- }
- BEGIN_RING(chan, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3);
- OUT_RELOCh(chan, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RELOCl(chan, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RING (chan, 0); /* ignored if TSC_LINKED (0x1234) == 1 */
-
- /* map constant buffers:
- * B = buffer ID (maybe more than 1 byte)
- * N = CB index used in shader instruction
- * P = program type (0 = VP, 2 = GP, 3 = FP)
- * SET_PROGRAM_CB = 0x000BBNP1
- */
- BEGIN_RING_NI(chan, screen->tesla, NV50TCL_SET_PROGRAM_CB, 8);
- /* bind immediate buffer */
- OUT_RING (chan, 0x001 | (NV50_CB_PMISC << 12));
- OUT_RING (chan, 0x021 | (NV50_CB_PMISC << 12));
- OUT_RING (chan, 0x031 | (NV50_CB_PMISC << 12));
- /* bind auxiliary constbuf to immediate data bo */
- OUT_RING (chan, 0x201 | (NV50_CB_AUX << 12));
- OUT_RING (chan, 0x221 | (NV50_CB_AUX << 12));
- /* bind parameter buffers */
- OUT_RING (chan, 0x101 | (NV50_CB_PVP << 12));
- OUT_RING (chan, 0x121 | (NV50_CB_PGP << 12));
- OUT_RING (chan, 0x131 | (NV50_CB_PFP << 12));
-
- /* shader stack */
- nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
-
- max_warps = util_bitcount(value & 0xffff);
- max_warps *= util_bitcount((value >> 24) & 0xf) * 32;
-
- stack_size = max_warps * 64 * 8;
-
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
- stack_size, &screen->stack_bo);
- if (ret) {
- nv50_screen_destroy(pscreen);
- return NULL;
- }
- BEGIN_RING(chan, screen->tesla, NV50TCL_STACK_ADDRESS_HIGH, 3);
- OUT_RELOCh(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RING (chan, 4);
-
- local_size = (NV50_CAP_MAX_PROGRAM_TEMPS * 16) * max_warps * 32;
-
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
- local_size, &screen->local_bo);
- if (ret) {
- nv50_screen_destroy(pscreen);
- return NULL;
- }
-
- local_size = NV50_CAP_MAX_PROGRAM_TEMPS * 16;
-
- BEGIN_RING(chan, screen->tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 3);
- OUT_RELOCh(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RING (chan, util_unsigned_logbase2(local_size / 8));
-
- /* Vertex array limits - max them out */
- for (i = 0; i < 16; i++) {
- BEGIN_RING(chan, screen->tesla,
- NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
- OUT_RING (chan, 0x000000ff);
- OUT_RING (chan, 0xffffffff);
- }
-
- BEGIN_RING(chan, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2);
- OUT_RINGf (chan, 0.0f);
- OUT_RINGf (chan, 1.0f);
-
- BEGIN_RING(chan, screen->tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
- OUT_RING (chan, 1);
-
- /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
- BEGIN_RING(chan, screen->tesla, NV50TCL_LINKED_TSC, 1);
- OUT_RING (chan, 1);
-
- BEGIN_RING(chan, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
- OUT_RING (chan, 1); /* default edgeflag to TRUE */
-
- FIRE_RING (chan);
-
- screen->force_push = debug_get_bool_option("NV50_ALWAYS_PUSH", FALSE);
- if(!screen->force_push)
- screen->base.vertex_buffer_flags = screen->base.index_buffer_flags = NOUVEAU_BO_GART;
- return pscreen;
+ int i = screen->tic.next;
+
+ while (screen->tic.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
+
+ screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
+
+ if (screen->tic.entries[i])
+ nv50_tic_entry(screen->tic.entries[i])->id = -1;
+
+ screen->tic.entries[i] = entry;
+ return i;
}
+int
+nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry)
+{
+ int i = screen->tsc.next;
+
+ while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
+
+ screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
+
+ if (screen->tsc.entries[i])
+ nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
+
+ screen->tsc.entries[i] = entry;
+ return i;
+}
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 6e15230b48..aea434b867 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -1,53 +1,148 @@
#ifndef __NV50_SCREEN_H__
#define __NV50_SCREEN_H__
+#define NOUVEAU_NVC0
#include "nouveau/nouveau_screen.h"
+#include "nouveau/nouveau_fence.h"
+#include "nouveau/nouveau_mm.h"
+#undef NOUVEAU_NVC0
+#include "nv50_winsys.h"
+#include "nv50_stateobj.h"
+
+#define NV50_TIC_MAX_ENTRIES 2048
+#define NV50_TSC_MAX_ENTRIES 2048
struct nv50_context;
-struct nv50_screen {
- struct nouveau_screen base;
+#define NV50_CODE_BO_SIZE_LOG2 19
- struct nouveau_winsys *nvws;
+#define NV50_SCRATCH_SIZE (2 << 20)
+#define NV50_SCRATCH_NR_BUFFERS 2
- struct nv50_context *cur_ctx;
+struct nv50_screen {
+ struct nouveau_screen base;
+ struct nouveau_winsys *nvws;
+
+ struct nv50_context *cur_ctx;
+
+ struct nouveau_bo *code;
+ struct nouveau_bo *uniforms;
+ struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
+ struct nouveau_bo *stack_bo;
+ struct nouveau_bo *tls_bo;
+
+ uint64_t tls_size;
+
+ struct nouveau_resource *vp_code_heap;
+ struct nouveau_resource *gp_code_heap;
+ struct nouveau_resource *fp_code_heap;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NV50_TIC_MAX_ENTRIES / 32];
+ } tic;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NV50_TSC_MAX_ENTRIES / 32];
+ } tsc;
+
+ struct {
+ uint32_t *map;
+ struct nouveau_bo *bo;
+ } fence;
+
+ struct nouveau_notifier *sync;
+
+ struct nouveau_mman *mm_VRAM_fe0;
+
+ struct nouveau_grobj *tesla;
+ struct nouveau_grobj *eng2d;
+ struct nouveau_grobj *m2mf;
+};
- struct nouveau_grobj *tesla;
- struct nouveau_grobj *eng2d;
- struct nouveau_grobj *m2mf;
- struct nouveau_notifier *sync;
+static INLINE struct nv50_screen *
+nv50_screen(struct pipe_screen *screen)
+{
+ return (struct nv50_screen *)screen;
+}
- struct nouveau_bo *constbuf_misc[1];
- struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES];
+void nv50_screen_make_buffers_resident(struct nv50_screen *);
- struct nouveau_resource *immd_heap;
+int nv50_screen_tic_alloc(struct nv50_screen *, void *);
+int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
- struct nouveau_bo *tic;
- struct nouveau_bo *tsc;
+static INLINE void
+nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
+{
+ struct nv50_screen *screen = nv50_screen(res->base.screen);
- struct nouveau_bo *stack_bo; /* control flow stack */
- struct nouveau_bo *local_bo; /* l[] memory */
+ if (res->mm) {
+ nouveau_fence_ref(screen->base.fence.current, &res->fence);
- boolean force_push;
-};
+ if (flags & NOUVEAU_BO_WR)
+ nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
+ }
+}
-static INLINE struct nv50_screen *
-nv50_screen(struct pipe_screen *screen)
+static INLINE void
+nv50_resource_validate(struct nv04_resource *res, uint32_t flags)
{
- return (struct nv50_screen *)screen;
-}
+ struct nv50_screen *screen = nv50_screen(res->base.screen);
-extern void nv50_screen_relocs(struct nv50_screen *);
+ if (likely(res->bo)) {
+ nouveau_bo_validate(screen->base.channel, res->bo, flags);
-extern void nv50_screen_reloc_constbuf(struct nv50_screen *, unsigned cbi);
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ if (flags & NOUVEAU_BO_RD)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nv50_resource_fence(res, flags);
+ }
+}
struct nv50_format {
- uint32_t rt;
- uint32_t tic;
- uint32_t vtx;
- uint32_t usage;
+ uint32_t rt;
+ uint32_t tic;
+ uint32_t vtx;
+ uint32_t usage;
};
extern const struct nv50_format nv50_format_table[];
+static INLINE void
+nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0)
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+}
+
+static INLINE void
+nv50_screen_tsc_unlock(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0)
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+}
+
+static INLINE void
+nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0) {
+ screen->tic.entries[tic->id] = NULL;
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+ }
+}
+
+static INLINE void
+nv50_screen_tsc_free(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0) {
+ screen->tsc.entries[tsc->id] = NULL;
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+ }
+}
+
#endif
diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c
index 306aa81d98..bea9c095bb 100644
--- a/src/gallium/drivers/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@@ -28,422 +28,266 @@
#include "nv50_context.h"
-static void
-nv50_transfer_constbuf(struct nv50_context *nv50,
- struct pipe_resource *buf, unsigned size, unsigned cbi)
+void
+nv50_constbufs_validate(struct nv50_context *nv50)
{
- struct pipe_context *pipe = &nv50->pipe;
- struct pipe_transfer *transfer;
struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- uint32_t *map;
- unsigned count, start;
+ unsigned s;
- if (buf == NULL)
- return;
+ for (s = 0; s < 3; ++s) {
+ struct nv04_resource *res;
+ int i;
+ unsigned p, b;
- map = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &transfer);
- if (!map)
- return;
+ if (s == PIPE_SHADER_FRAGMENT)
+ p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT;
+ else
+ if (s == PIPE_SHADER_GEOMETRY)
+ p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY;
+ else
+ p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX;
+
+ while (nv50->constbuf_dirty[s]) {
+ struct nouveau_bo *bo;
+ unsigned start = 0;
+ unsigned words = 0;
+
+ i = ffs(nv50->constbuf_dirty[s]) - 1;
+ nv50->constbuf_dirty[s] &= ~(1 << i);
+
+ res = nv04_resource(nv50->constbuf[s][i]);
+ if (!res) {
+ if (i != 0) {
+ BEGIN_RING(chan, RING_3D(SET_PROGRAM_CB), 1);
+ OUT_RING (chan, (i << 8) | p | 0);
+ }
+ continue;
+ }
- count = (buf->width0 + 3) / 4;
- start = 0;
+ if (i == 0) {
+ b = NV50_CB_PVP + s;
- while (count) {
- unsigned nr = AVAIL_RING(chan);
+ /* always upload GL uniforms through CB DATA */
+ bo = nv50->screen->uniforms;
+ words = res->base.width0 / 4;
+ } else {
+ b = s * 16 + i;
- if (nr < 8) {
- FIRE_RING(chan);
- continue;
- }
- nr = MIN2(count, nr - 7);
- nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+ assert(0);
- nv50_screen_reloc_constbuf(nv50->screen, cbi);
+ if (!nouveau_resource_mapped_by_gpu(&res->base)) {
+ nouveau_buffer_migrate(&nv50->base, res, NOUVEAU_BO_VRAM);
- BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
- OUT_RING (chan, (start << 8) | cbi);
- BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
- OUT_RINGp (chan, map, nr);
+ BEGIN_RING(chan, RING_3D(CODE_CB_FLUSH), 1);
+ OUT_RING (chan, 0);
+ }
+ MARK_RING (chan, 6, 2);
+ BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3);
+ OUT_RESRCh(chan, res, 0, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, 0, NOUVEAU_BO_RD);
+ OUT_RING (chan, (b << 16) | (res->base.width0 & 0xffff));
+ BEGIN_RING(chan, RING_3D(SET_PROGRAM_CB), 1);
+ OUT_RING (chan, (b << 12) | (i << 8) | p | 1);
- count -= nr;
- start += nr;
- map += nr;
- }
-
- pipe_buffer_unmap(pipe, buf, transfer);
-}
+ bo = res->bo;
-static void
-nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- unsigned cbi;
-
- if (p->immd_size) {
- uint32_t *data = p->immd;
- unsigned count = p->immd_size / 4;
- unsigned start = 0;
+ nv50_bufctx_add_resident(nv50, NV50_BUFCTX_CONSTANT, res,
+ res->domain | NOUVEAU_BO_RD);
+ }
- while (count) {
- unsigned nr = AVAIL_RING(chan);
+ if (words) {
+ MARK_RING(chan, 8, 1);
- if (nr < 8) {
- FIRE_RING(chan);
- continue;
+ nouveau_bo_validate(chan, bo, res->domain | NOUVEAU_BO_WR);
}
- nr = MIN2(count, nr - 7);
- nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
- nv50_screen_reloc_constbuf(nv50->screen, NV50_CB_PMISC);
+ while (words) {
+ unsigned nr = AVAIL_RING(chan);
- BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
- OUT_RING (chan, (start << 8) | NV50_CB_PMISC);
- BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
- OUT_RINGp (chan, data, nr);
+ if (nr < 16) {
+ FIRE_RING(chan);
+ nouveau_bo_validate(chan, bo, res->domain | NOUVEAU_BO_WR);
+ continue;
+ }
+ nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
- count -= nr;
- start += nr;
- data += nr;
- }
- }
+ BEGIN_RING(chan, RING_3D(CB_ADDR), 1);
+ OUT_RING (chan, (start << 8) | b);
+ BEGIN_RING_NI(chan, RING_3D(CB_DATA(0)), nr);
+ OUT_RINGp (chan, &res->data[start * 4], nr);
- /* If the state tracker doesn't change the constbuf, and it is first
- * validated with a program that doesn't use it, this check prevents
- * it from even being uploaded. */
- /*
- if (p->parm_size == 0)
- return;
- */
-
- switch (p->type) {
- case PIPE_SHADER_VERTEX:
- cbi = NV50_CB_PVP;
- break;
- case PIPE_SHADER_FRAGMENT:
- cbi = NV50_CB_PFP;
- break;
- case PIPE_SHADER_GEOMETRY:
- cbi = NV50_CB_PGP;
- break;
- default:
- assert(0);
- return;
+ start += nr;
+ words -= nr;
+ }
+ }
}
-
- nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi);
}
-static void
-nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
+static boolean
+nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+ struct nouveau_resource *heap;
int ret;
- unsigned offset;
- unsigned size = p->code_size;
- uint32_t *data = p->code;
+ unsigned size;
- assert(p->translated);
+ if (prog->translated)
+ return TRUE;
- /* TODO: use a single bo (for each type) for shader code */
- if (p->bo)
- return;
- ret = nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, size, &p->bo);
- assert(!ret);
-
- offset = p->code_start = 0;
-
- BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
- OUT_RING (chan, NV50_2D_DST_FORMAT_R8_UNORM);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
- OUT_RING (chan, 0x40000);
- BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 2);
- OUT_RING (chan, 0x10000);
- OUT_RING (chan, 1);
-
- while (size) {
- unsigned nr = size / 4;
-
- if (AVAIL_RING(chan) < 32)
- FIRE_RING(chan);
-
- nr = MIN2(nr, AVAIL_RING(chan) - 18);
- nr = MIN2(nr, 1792);
- if (nr < (size / 4))
- nr &= ~0x3f;
- assert(!(size & 3));
-
- BEGIN_RING(chan, eng2d, NV50_2D_DST_ADDRESS_HIGH, 2);
- OUT_RELOCh(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
- OUT_RING (chan, 0);
- OUT_RING (chan, NV50_2D_SIFC_FORMAT_R8_UNORM);
- BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
- OUT_RING (chan, nr * 4);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0);
- OUT_RING (chan, 0);
- OUT_RING (chan, 0);
- OUT_RING (chan, 0);
-
- BEGIN_RING_NI(chan, eng2d, NV50_2D_SIFC_DATA, nr);
- OUT_RINGp (chan, data, nr);
-
- data += nr;
- offset += nr * 4;
- size -= nr * 4;
- }
+ prog->translated = nv50_program_translate(prog);
+ if (!prog->translated)
+ return FALSE;
- BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
- OUT_RING (chan, 0);
-}
+ if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap;
+ else
+ if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap;
+ else
+ heap = nv50->screen->vp_code_heap;
-static void
-nv50_vp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(5, 7, 2);
-
- nv50_program_validate_code(nv50, p);
-
- so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
- so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
- so_data (so, p->vp.attrs[0]);
- so_data (so, p->vp.attrs[1]);
- so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
- so_data (so, p->max_out);
- so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1);
- so_data (so, p->max_gpr);
- so_method(so, tesla, NV50TCL_VP_START_ID, 1);
- so_data (so, p->code_start);
-
- so_ref(so, &p->so);
- so_ref(NULL, &so);
-}
+ size = align(prog->code_size, 0x100);
-static void
-nv50_fp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(6, 7, 2);
-
- nv50_program_validate_code(nv50, p);
-
- so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
- so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);
- so_data (so, p->max_gpr);
- so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
- so_data (so, p->max_out);
- so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
- so_data (so, p->fp.flags[0]);
- so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
- so_data (so, p->fp.flags[1]);
- so_method(so, tesla, NV50TCL_FP_START_ID, 1);
- so_data (so, p->code_start);
-
- so_ref(so, &p->so);
- so_ref(NULL, &so);
-}
+ ret = nouveau_resource_alloc(heap, size, prog, &prog->res);
+ if (ret) {
+ NOUVEAU_ERR("out of code space for shader type %i\n", prog->type);
+ return FALSE;
+ }
+ prog->code_base = prog->res->start;
-static void
-nv50_gp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(6, 7, 2);
-
- nv50_program_validate_code(nv50, p);
-
- so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
- so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1);
- so_data (so, p->max_gpr);
- so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1);
- so_data (so, p->max_out);
- so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1);
- so_data (so, p->gp.prim_type);
- so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1);
- so_data (so, p->gp.vert_count);
- so_method(so, tesla, NV50TCL_GP_START_ID, 1);
- so_data (so, p->code_start);
-
- so_ref(so, &p->so);
- so_ref(NULL, &so);
-}
+ nv50_relocate_program(prog, prog->code_base, 0);
-static boolean
-nv50_program_validate(struct nv50_program *p)
-{
- p->translated = nv50_program_tx(p);
- assert(p->translated);
- return p->translated;
-}
+ nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
+ (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
+ NOUVEAU_BO_VRAM, prog->code_size, prog->code);
-static INLINE void
-nv50_program_validate_common(struct nv50_context *nv50, struct nv50_program *p)
-{
- nv50_program_validate_code(nv50, p);
+ BEGIN_RING(nv50->screen->base.channel, RING_3D(CODE_CB_FLUSH), 1);
+ OUT_RING (nv50->screen->base.channel, 0);
- if (p->uses_lmem)
- nv50->req_lmem |= 1 << p->type;
- else
- nv50->req_lmem &= ~(1 << p->type);
+ return TRUE;
}
-struct nouveau_stateobj *
+void
nv50_vertprog_validate(struct nv50_context *nv50)
{
- struct nv50_program *p = nv50->vertprog;
- struct nouveau_stateobj *so = NULL;
-
- if (!p->translated) {
- if (nv50_program_validate(p))
- nv50_vp_update_stateobj(nv50, p);
- else
- return NULL;
- }
-
- if (nv50->dirty & NV50_NEW_VERTPROG_CB)
- nv50_program_validate_data(nv50, p);
-
- if (!(nv50->dirty & NV50_NEW_VERTPROG))
- return NULL;
-
- nv50_program_validate_common(nv50, p);
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nv50_program *vp = nv50->vertprog;
- so_ref(p->so, &so);
- return so;
+ if (!nv50_program_validate(nv50, vp))
+ return;
+
+ BEGIN_RING(chan, RING_3D(VP_ATTR_EN(0)), 2);
+ OUT_RING (chan, vp->vp.attrs[0]);
+ OUT_RING (chan, vp->vp.attrs[1]);
+ BEGIN_RING(chan, RING_3D(VP_REG_ALLOC_RESULT), 1);
+ OUT_RING (chan, vp->max_out);
+ BEGIN_RING(chan, RING_3D(VP_REG_ALLOC_TEMP), 1);
+ OUT_RING (chan, vp->max_gpr);
+ BEGIN_RING(chan, RING_3D(VP_START_ID), 1);
+ OUT_RING (chan, vp->code_base);
}
-struct nouveau_stateobj *
+void
nv50_fragprog_validate(struct nv50_context *nv50)
{
- struct nv50_program *p = nv50->fragprog;
- struct nouveau_stateobj *so = NULL;
-
- if (!p->translated) {
- if (nv50_program_validate(p))
- nv50_fp_update_stateobj(nv50, p);
- else
- return NULL;
- }
-
- if (nv50->dirty & NV50_NEW_FRAGPROG_CB)
- nv50_program_validate_data(nv50, p);
-
- if (!(nv50->dirty & NV50_NEW_FRAGPROG))
- return NULL;
-
- nv50_program_validate_common(nv50, p);
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nv50_program *fp = nv50->fragprog;
- so_ref(p->so, &so);
- return so;
+ if (!nv50_program_validate(nv50, fp))
+ return;
+
+ BEGIN_RING(chan, RING_3D(FP_REG_ALLOC_TEMP), 1);
+ OUT_RING (chan, fp->max_gpr);
+ BEGIN_RING(chan, RING_3D(FP_RESULT_COUNT), 1);
+ OUT_RING (chan, fp->max_out);
+ BEGIN_RING(chan, RING_3D(FP_CONTROL), 1);
+ OUT_RING (chan, fp->fp.flags[0]);
+ BEGIN_RING(chan, RING_3D(FP_CTRL_UNK196C), 1);
+ OUT_RING (chan, fp->fp.flags[1]);
+ BEGIN_RING(chan, RING_3D(FP_START_ID), 1);
+ OUT_RING (chan, fp->code_base);
}
-struct nouveau_stateobj *
-nv50_geomprog_validate(struct nv50_context *nv50)
+void
+nv50_gmtyprog_validate(struct nv50_context *nv50)
{
- struct nv50_program *p = nv50->geomprog;
- struct nouveau_stateobj *so = NULL;
-
- /* GP may be NULL, but VP and FP may not */
- if (!p)
- return NULL; /* GP is deactivated in linkage validation */
-
- if (!p->translated) {
- if (nv50_program_validate(p))
- nv50_gp_update_stateobj(nv50, p);
- else
- return NULL;
- }
-
- if (nv50->dirty & NV50_NEW_GEOMPROG_CB)
- nv50_program_validate_data(nv50, p);
-
- if (!(nv50->dirty & NV50_NEW_GEOMPROG))
- return NULL;
-
- nv50_program_validate_common(nv50, p);
-
- so_ref(p->so, &so);
- return so;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nv50_program *gp = nv50->vertprog;
+
+ if (!nv50_program_validate(nv50, gp))
+ return;
+
+ BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_TEMP), 1);
+ OUT_RING (chan, gp->max_gpr);
+ BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_RESULT), 1);
+ OUT_RING (chan, gp->max_out);
+ BEGIN_RING(chan, RING_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1);
+ OUT_RING (chan, gp->gp.prim_type);
+ BEGIN_RING(chan, RING_3D(GP_VERTEX_OUTPUT_COUNT), 1);
+ OUT_RING (chan, gp->gp.vert_count);
+ BEGIN_RING(chan, RING_3D(GP_START_ID), 1);
+ OUT_RING (chan, gp->code_base);
}
-/* XXX: this might not work correctly in all cases yet: we assume that
- * an FP generic input that is not written in the VP is gl_PointCoord.
- */
-static uint32_t
-nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned m)
+void
+nv50_sprite_coords_validate(struct nv50_context *nv50)
{
- struct nv50_program *vp = nv50->vertprog;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ uint32_t pntc[8], mode;
struct nv50_program *fp = nv50->fragprog;
unsigned i, c;
+ unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff;
+
+ if (!nv50->rast->pipe.point_quad_rasterization) {
+ if (nv50->state.point_sprite) {
+ BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE_MAP(0)), 8);
+ for (i = 0; i < 8; ++i)
+ OUT_RING(chan, 0);
- memset(pntc, 0, 8 * sizeof(uint32_t));
+ nv50->state.point_sprite = FALSE;
+ }
+ return;
+ } else {
+ nv50->state.point_sprite = TRUE;
+ }
- if (nv50->geomprog)
- vp = nv50->geomprog;
+ memset(pntc, 0, sizeof(pntc));
for (i = 0; i < fp->in_nr; i++) {
- unsigned j, n = util_bitcount(fp->in[i].mask);
+ unsigned n = util_bitcount(fp->in[i].mask);
if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
m += n;
continue;
}
-
- for (j = 0; j < vp->out_nr; ++j)
- if (vp->out[j].sn == fp->in[i].sn && vp->out[j].si == fp->in[i].si)
- break;
-
- if (j < vp->out_nr) {
- uint32_t en = nv50->rasterizer->pipe.sprite_coord_enable;
-
- if (!(en & (1 << vp->out[j].si))) {
- m += n;
- continue;
- }
+ if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) {
+ m += n;
+ continue;
}
- /* this is either PointCoord or replaced by sprite coords */
- for (c = 0; c < 4; c++) {
- if (!(fp->in[i].mask & (1 << c)))
- continue;
- pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
- ++m;
+ for (c = 0; c < 4; ++c) {
+ if (fp->in[i].mask & (1 << c)) {
+ pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
+ ++m;
+ }
}
}
- if (nv50->rasterizer->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
- return 0;
- return (1 << 4);
+
+ if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
+ mode = 0x00;
+ else
+ mode = 0x10;
+
+ BEGIN_RING(chan, RING_3D(POINT_SPRITE_CTRL), 1);
+ OUT_RING (chan, mode);
+
+ BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE_MAP(0)), 8);
+ OUT_RINGp (chan, pntc, 8);
}
static int
-nv50_vec4_map(uint32_t *map32, int mid, uint32_t lin[4],
+nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],
struct nv50_varying *in, struct nv50_varying *out)
{
int c;
uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
- uint8_t *map = (uint8_t *)map32;
for (c = 0; c < 4; ++c) {
if (mf & 1) {
@@ -465,140 +309,112 @@ nv50_vec4_map(uint32_t *map32, int mid, uint32_t lin[4],
return mid;
}
-struct nouveau_stateobj *
+void
nv50_fp_linkage_validate(struct nv50_context *nv50)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_program *vp;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;
struct nv50_program *fp = nv50->fragprog;
- struct nouveau_stateobj *so;
struct nv50_varying dummy;
int i, n, c, m;
-
- uint32_t map[16], lin[4], pntc[8];
-
+ uint32_t primid = 0;
+ uint32_t psiz = 0x000;
uint32_t interp = fp->fp.interp;
uint32_t colors = fp->fp.colors;
- uint32_t clip = 0x04;
- uint32_t psiz = 0x000;
- uint32_t primid = 0;
- uint32_t sysval = 0;
+ uint32_t lin[4];
+ uint8_t map[64];
- if (nv50->geomprog) {
- vp = nv50->geomprog;
- memset(map, 0x80, sizeof(map));
- } else {
- vp = nv50->vertprog;
- memset(map, 0x40, sizeof(map));
- }
- memset(lin, 0, sizeof(lin));
+ memset(lin, 0x00, sizeof(lin));
+
+ /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx
+ * or is it the first byte ?
+ */
+ memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map));
- dummy.linear = 0;
dummy.mask = 0xf; /* map all components of HPOS */
+ dummy.linear = 0;
m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
- if (vp->vp.clpd < 0x40) {
- for (c = 0; c < vp->vp.clpd_nr; ++c) {
- map[m / 4] |= (vp->vp.clpd + c) << ((m % 4) * 8);
- ++m;
- }
- clip |= vp->vp.clpd_nr << 8;
- }
+ for (c = 0; c < vp->vp.clpd_nr; ++c)
+ map[m++] |= vp->vp.clpd + c;
colors |= m << 8; /* adjust BFC0 id */
- /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
- if (nv50->rasterizer->pipe.light_twoside) {
+ /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
+ if (nv50->rast->pipe.light_twoside) {
for (i = 0; i < 2; ++i)
m = nv50_vec4_map(map, m, lin,
- &fp->in[fp->vp.bfc[i]],
- &vp->out[vp->vp.bfc[i]]);
+ &fp->in[fp->vp.bfc[i]], &vp->out[vp->vp.bfc[i]]);
}
-
colors += m - 4; /* adjust FFC0 id */
- interp |= m << 8; /* set mid where 'normal' FP inputs start */
+ interp |= m << 8; /* set map id where 'normal' FP inputs start */
dummy.mask = 0x0;
- for (i = 0; i < fp->in_nr; i++) {
+ for (i = 0; i < fp->in_nr; ++i) {
for (n = 0; n < vp->out_nr; ++n)
if (vp->out[n].sn == fp->in[i].sn &&
vp->out[n].si == fp->in[i].si)
break;
-
m = nv50_vec4_map(map, m, lin,
&fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
- }
+ }
/* PrimitiveID either is replaced by the system value, or
* written by the geometry shader into an output register
*/
if (fp->gp.primid < 0x40) {
- i = (m % 4) * 8;
- map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->gp.primid << i);
- primid = m++;
+ primid = m;
+ map[m++] = vp->gp.primid;
}
- if (nv50->rasterizer->pipe.point_size_per_vertex) {
- i = (m % 4) * 8;
- map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->vp.psiz << i);
- psiz = (m++ << 4) | 1;
+ if (nv50->rast->pipe.point_size_per_vertex) {
+ psiz = (m << 4) | 1;
+ map[m++] = vp->vp.psiz;
}
- /* now fill the stateobj (at most 28 so_data) */
- so = so_new(10, 54, 0);
-
n = (m + 3) / 4;
assert(m <= 64);
- if (vp->type == PIPE_SHADER_GEOMETRY) {
- so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1);
- so_data (so, m);
- so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n);
- so_datap (so, map, n);
+
+ if (unlikely(nv50->gmtyprog)) {
+ BEGIN_RING(chan, RING_3D(GP_RESULT_MAP_SIZE), 1);
+ OUT_RING (chan, m);
+ BEGIN_RING(chan, RING_3D(GP_RESULT_MAP(0)), n);
+ OUT_RINGp (chan, map, n);
} else {
- so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
- so_data (so, vp->vp.attrs[2]);
+ BEGIN_RING(chan, RING_3D(VP_GP_BUILTIN_ATTR_EN), 1);
+ OUT_RING (chan, vp->vp.attrs[2]);
- so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1);
- so_data (so, primid);
+ BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_4), 1);
+ OUT_RING (chan, primid);
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
- so_data (so, m);
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
- so_datap (so, map, n);
+ BEGIN_RING(chan, RING_3D(VP_RESULT_MAP_SIZE), 1);
+ OUT_RING (chan, m);
+ BEGIN_RING(chan, RING_3D(VP_RESULT_MAP(0)), n);
+ OUT_RINGp (chan, map, n);
}
- so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
- so_data (so, colors);
- so_data (so, clip);
- so_data (so, sysval);
- so_data (so, psiz);
-
- so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
- so_data (so, interp);
-
- so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
- so_datap (so, lin, 4);
+ BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_0), 4);
+ OUT_RING (chan, colors);
+ OUT_RING (chan, (vp->vp.clpd_nr << 8) | 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, psiz);
- if (nv50->rasterizer->pipe.point_quad_rasterization) {
- so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1);
- so_data (so,
- nv50_pntc_replace(nv50, pntc, (interp >> 8) & 0xff));
+ BEGIN_RING(chan, RING_3D(FP_INTERPOLANT_CTRL), 1);
+ OUT_RING (chan, interp);
- so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
- so_datap (so, pntc, 8);
- }
+ nv50->state.interpolant_ctrl = interp;
- so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
- so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
+ BEGIN_RING(chan, RING_3D(NOPERSPECTIVE_BITMAP(0)), 4);
+ OUT_RINGp (chan, lin, 4);
- return so;
+ BEGIN_RING(chan, RING_3D(GP_ENABLE), 1);
+ OUT_RING (chan, nv50->gmtyprog ? 1 : 0);
}
static int
-nv50_vp_gp_mapping(uint32_t *map32, int m,
+nv50_vp_gp_mapping(uint8_t *map, int m,
struct nv50_program *vp, struct nv50_program *gp)
{
- uint8_t *map = (uint8_t *)map32;
int i, j, c;
for (i = 0; i < gp->in_nr; ++i) {
@@ -625,34 +441,29 @@ nv50_vp_gp_mapping(uint32_t *map32, int m,
return m;
}
-struct nouveau_stateobj *
+void
nv50_gp_linkage_validate(struct nv50_context *nv50)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
struct nv50_program *vp = nv50->vertprog;
- struct nv50_program *gp = nv50->geomprog;
- uint32_t map[16];
+ struct nv50_program *gp = nv50->gmtyprog;
int m = 0;
+ int n;
+ uint8_t map[64];
if (!gp)
- return NULL;
+ return;
memset(map, 0, sizeof(map));
m = nv50_vp_gp_mapping(map, m, vp, gp);
- so = so_new(3, 24 - 3, 0);
-
- so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
- so_data (so, vp->vp.attrs[2] | gp->vp.attrs[2]);
-
- assert(m <= 32);
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
- so_data (so, m);
+ n = (m + 3) / 4;
- m = (m + 3) / 4;
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
- so_datap (so, map, m);
+ BEGIN_RING(chan, RING_3D(VP_GP_BUILTIN_ATTR_EN), 1);
+ OUT_RING (chan, vp->vp.attrs[2] | gp->vp.attrs[2]);
- return so;
+ BEGIN_RING(chan, RING_3D(VP_RESULT_MAP_SIZE), 1);
+ OUT_RING (chan, m);
+ BEGIN_RING(chan, RING_3D(VP_RESULT_MAP(0)), n);
+ OUT_RINGp (chan, map, n);
}
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index f42fa2d4d2..db25715969 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Ben Skeggs
+ * Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,871 +20,856 @@
* SOFTWARE.
*/
-#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
+#include "nv50_stateobj.h"
#include "nv50_context.h"
-#include "nv50_texture.h"
-#include "nouveau/nouveau_stateobj.h"
+#include "nv50_3d.xml.h"
+#include "nv50_texture.xml.h"
+
+#include "nouveau/nouveau_gldefs.h"
static INLINE uint32_t
nv50_colormask(unsigned mask)
{
- uint32_t cmask = 0;
+ uint32_t ret = 0;
- if (mask & PIPE_MASK_R)
- cmask |= 0x0001;
- if (mask & PIPE_MASK_G)
- cmask |= 0x0010;
- if (mask & PIPE_MASK_B)
- cmask |= 0x0100;
- if (mask & PIPE_MASK_A)
- cmask |= 0x1000;
+ if (mask & PIPE_MASK_R)
+ ret |= 0x0001;
+ if (mask & PIPE_MASK_G)
+ ret |= 0x0010;
+ if (mask & PIPE_MASK_B)
+ ret |= 0x0100;
+ if (mask & PIPE_MASK_A)
+ ret |= 0x1000;
- return cmask;
+ return ret;
}
+#define NV50_BLEND_FACTOR_CASE(a, b) \
+ case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b
+
static INLINE uint32_t
-nv50_blend_func(unsigned factor)
-{
- switch (factor) {
- case PIPE_BLENDFACTOR_ZERO:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ZERO;
- case PIPE_BLENDFACTOR_ONE:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ONE;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA;
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA;
- case PIPE_BLENDFACTOR_DST_COLOR:
- return NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- return NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- return NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR;
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- return NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA;
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA;
- default:
- return NV50TCL_BLEND_FUNC_SRC_RGB_ZERO;
- }
+nv50_blend_fac(unsigned factor)
+{
+ switch (factor) {
+ NV50_BLEND_FACTOR_CASE(ONE, ONE);
+ NV50_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR);
+ NV50_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA);
+ NV50_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA);
+ NV50_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR);
+ NV50_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE);
+ NV50_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR);
+ NV50_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA);
+ NV50_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR);
+ NV50_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA);
+ NV50_BLEND_FACTOR_CASE(ZERO, ZERO);
+ NV50_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA);
+ NV50_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA);
+ NV50_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA);
+ NV50_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA);
+ default:
+ return NV50_3D_BLEND_FACTOR_ZERO;
+ }
}
static void *
nv50_blend_state_create(struct pipe_context *pipe,
- const struct pipe_blend_state *cso)
-{
- struct nouveau_stateobj *so = so_new(5, 24, 0);
- struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
- struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj);
- unsigned i, blend_enabled = 0;
-
- /*XXX ignored:
- * - dither
- */
-
- so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8);
- if (cso->independent_blend_enable) {
- for (i = 0; i < 8; ++i) {
- so_data(so, cso->rt[i].blend_enable);
- if (cso->rt[i].blend_enable)
- blend_enabled = 1;
- }
- } else
- if (cso->rt[0].blend_enable) {
- blend_enabled = 1;
- for (i = 0; i < 8; i++)
- so_data(so, 1);
- } else {
- for (i = 0; i < 8; i++)
- so_data(so, 0);
- }
- if (blend_enabled) {
- so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5);
- so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
- so_data (so, nv50_blend_func(cso->rt[0].rgb_src_factor));
- so_data (so, nv50_blend_func(cso->rt[0].rgb_dst_factor));
- so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
- so_data (so, nv50_blend_func(cso->rt[0].alpha_src_factor));
- so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1);
- so_data (so, nv50_blend_func(cso->rt[0].alpha_dst_factor));
- }
-
- if (cso->logicop_enable == 0 ) {
- so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 1);
- so_data (so, 0);
- } else {
- so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 2);
- so_data (so, 1);
- so_data (so, nvgl_logicop_func(cso->logicop_func));
- }
-
- so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8);
- if (cso->independent_blend_enable)
- for (i = 0; i < 8; ++i)
- so_data(so, nv50_colormask(cso->rt[i].colormask));
- else {
- uint32_t cmask = nv50_colormask(cso->rt[0].colormask);
- for (i = 0; i < 8; i++)
- so_data(so, cmask);
- }
-
- bso->pipe = *cso;
- so_ref(so, &bso->so);
- so_ref(NULL, &so);
- return (void *)bso;
+ const struct pipe_blend_state *cso)
+{
+ struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj);
+ int i;
+ boolean emit_common_func = cso->rt[0].blend_enable;
+
+ if (nv50_context(pipe)->screen->tesla->grclass >= NVA3_3D) {
+ SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1);
+ SB_DATA (so, cso->independent_blend_enable);
+ }
+
+ so->pipe = *cso;
+
+ SB_BEGIN_3D(so, BLEND_ENABLE(0), 8);
+ if (cso->independent_blend_enable) {
+ for (i = 0; i < 8; ++i) {
+ SB_DATA(so, cso->rt[i].blend_enable);
+ if (cso->rt[i].blend_enable)
+ emit_common_func = TRUE;
+ }
+
+ if (nv50_context(pipe)->screen->tesla->grclass >= NVA3_3D) {
+ emit_common_func = FALSE;
+
+ for (i = 0; i < 8; ++i) {
+ if (!cso->rt[i].blend_enable)
+ continue;
+ SB_BEGIN_3D_(so, NVA3_3D_IBLEND_EQUATION_RGB(i), 6);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_src_factor));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_src_factor));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_dst_factor));
+ }
+ }
+ } else {
+ for (i = 0; i < 8; ++i)
+ SB_DATA(so, cso->rt[0].blend_enable);
+ }
+
+ if (emit_common_func) {
+ SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_src_factor));
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_src_factor));
+ SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1);
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_dst_factor));
+ }
+
+ if (cso->logicop_enable) {
+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 1);
+ SB_DATA (so, 0);
+ }
+
+ SB_BEGIN_3D(so, COLOR_MASK(0), 8);
+ if (cso->independent_blend_enable) {
+ for (i = 0; i < 8; ++i)
+ SB_DATA(so, nv50_colormask(cso->rt[i].colormask));
+ } else {
+ uint32_t cmask = nv50_colormask(cso->rt[0].colormask);
+ for (i = 0; i < 8; ++i)
+ SB_DATA(so, cmask);
+ }
+
+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ return so;
}
static void
nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50->blend = hwcso;
- nv50->dirty |= NV50_NEW_BLEND;
+ nv50->blend = hwcso;
+ nv50->dirty |= NV50_NEW_BLEND;
}
static void
nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso)
{
- struct nv50_blend_stateobj *bso = hwcso;
-
- so_ref(NULL, &bso->so);
- FREE(bso);
+ FREE(hwcso);
}
-static INLINE unsigned
-wrap_mode(unsigned wrap)
-{
- switch (wrap) {
- case PIPE_TEX_WRAP_REPEAT:
- return NV50TSC_1_0_WRAPS_REPEAT;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- return NV50TSC_1_0_WRAPS_MIRROR_REPEAT;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- return NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER;
- case PIPE_TEX_WRAP_CLAMP:
- return NV50TSC_1_0_WRAPS_CLAMP;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- return NV50TSC_1_0_WRAPS_MIRROR_CLAMP;
- default:
- NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
- return NV50TSC_1_0_WRAPS_REPEAT;
- }
-}
static void *
-nv50_sampler_state_create(struct pipe_context *pipe,
- const struct pipe_sampler_state *cso)
-{
- struct nv50_sampler_stateobj *sso = CALLOC(1, sizeof(*sso));
- unsigned *tsc = sso->tsc;
- float limit;
-
- tsc[0] = (0x00026000 |
- (wrap_mode(cso->wrap_s) << 0) |
- (wrap_mode(cso->wrap_t) << 3) |
- (wrap_mode(cso->wrap_r) << 6));
-
- switch (cso->mag_img_filter) {
- case PIPE_TEX_FILTER_LINEAR:
- tsc[1] |= NV50TSC_1_1_MAGF_LINEAR;
- break;
- case PIPE_TEX_FILTER_NEAREST:
- default:
- tsc[1] |= NV50TSC_1_1_MAGF_NEAREST;
- break;
- }
-
- switch (cso->min_img_filter) {
- case PIPE_TEX_FILTER_LINEAR:
- tsc[1] |= NV50TSC_1_1_MINF_LINEAR;
- break;
- case PIPE_TEX_FILTER_NEAREST:
- default:
- tsc[1] |= NV50TSC_1_1_MINF_NEAREST;
- break;
- }
-
- switch (cso->min_mip_filter) {
- case PIPE_TEX_MIPFILTER_LINEAR:
- tsc[1] |= NV50TSC_1_1_MIPF_LINEAR;
- break;
- case PIPE_TEX_MIPFILTER_NEAREST:
- tsc[1] |= NV50TSC_1_1_MIPF_NEAREST;
- break;
- case PIPE_TEX_MIPFILTER_NONE:
- default:
- tsc[1] |= NV50TSC_1_1_MIPF_NONE;
- break;
- }
-
- if (cso->max_anisotropy >= 16)
- tsc[0] |= (7 << 20);
- else
- if (cso->max_anisotropy >= 12)
- tsc[0] |= (6 << 20);
- else {
- tsc[0] |= (cso->max_anisotropy >> 1) << 20;
-
- if (cso->max_anisotropy >= 4)
- tsc[1] |= NV50TSC_1_1_UNKN_ANISO_35;
- else
- if (cso->max_anisotropy >= 2)
- tsc[1] |= NV50TSC_1_1_UNKN_ANISO_15;
- }
-
- if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- /* XXX: must be deactivated for non-shadow textures */
- tsc[0] |= (1 << 9);
- tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
- }
-
- limit = CLAMP(cso->lod_bias, -16.0, 15.0);
- tsc[1] |= ((int)(limit * 256.0) & 0x1fff) << 12;
-
- tsc[2] |= ((int)CLAMP(cso->max_lod, 0.0, 15.0) << 20) |
- ((int)CLAMP(cso->min_lod, 0.0, 15.0) << 8);
-
- tsc[4] = fui(cso->border_color[0]);
- tsc[5] = fui(cso->border_color[1]);
- tsc[6] = fui(cso->border_color[2]);
- tsc[7] = fui(cso->border_color[3]);
-
- sso->normalized = cso->normalized_coords;
- return (void *)sso;
-}
-
-/* type == 0 for VPs, 1 for GPs, 2 for FPs, which is how the
- * relevant tesla methods are indexed (NV50TCL_BIND_TSC etc.)
- */
-static INLINE void
-nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type,
- unsigned nr, void **sampler)
+nv50_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv50_rasterizer_stateobj *so;
+
+ so = CALLOC_STRUCT(nv50_rasterizer_stateobj);
+ if (!so)
+ return NULL;
+ so->pipe = *cso;
+
+#ifndef NV50_SCISSORS_CLIPPING
+ SB_BEGIN_3D(so, SCISSOR_ENABLE(0), 1);
+ SB_DATA (so, cso->scissor);
+#endif
+
+ SB_BEGIN_3D(so, SHADE_MODEL, 1);
+ SB_DATA (so, cso->flatshade ? NV50_3D_SHADE_MODEL_FLAT :
+ NV50_3D_SHADE_MODEL_SMOOTH);
+ SB_BEGIN_3D(so, PROVOKING_VERTEX_LAST, 1);
+ SB_DATA (so, !cso->flatshade_first);
+ SB_BEGIN_3D(so, VERTEX_TWO_SIDE_ENABLE, 1);
+ SB_DATA (so, cso->light_twoside);
+
+ SB_BEGIN_3D(so, LINE_WIDTH, 1);
+ SB_DATA (so, fui(cso->line_width));
+ SB_BEGIN_3D(so, LINE_SMOOTH_ENABLE, 1);
+ SB_DATA (so, cso->line_smooth);
+
+ SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1);
+ if (cso->line_stipple_enable) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, LINE_STIPPLE, 1);
+ SB_DATA (so, (cso->line_stipple_pattern << 8) |
+ cso->line_stipple_factor);
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ if (!cso->point_size_per_vertex) {
+ SB_BEGIN_3D(so, POINT_SIZE, 1);
+ SB_DATA (so, fui(cso->point_size));
+ }
+ SB_BEGIN_3D(so, POINT_SPRITE_ENABLE, 1);
+ SB_DATA (so, cso->point_quad_rasterization);
+ SB_BEGIN_3D(so, POINT_SMOOTH_ENABLE, 1);
+ SB_DATA (so, cso->point_smooth);
+
+ SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 3);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_front));
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_back));
+ SB_DATA (so, cso->poly_smooth);
+
+ SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3);
+ SB_DATA (so, cso->cull_face != PIPE_FACE_NONE);
+ SB_DATA (so, cso->front_ccw ? NV50_3D_FRONT_FACE_CCW :
+ NV50_3D_FRONT_FACE_CW);
+ switch (cso->cull_face) {
+ case PIPE_FACE_FRONT_AND_BACK:
+ SB_DATA(so, NV50_3D_CULL_FACE_FRONT_AND_BACK);
+ break;
+ case PIPE_FACE_FRONT:
+ SB_DATA(so, NV50_3D_CULL_FACE_FRONT);
+ break;
+ case PIPE_FACE_BACK:
+ default:
+ SB_DATA(so, NV50_3D_CULL_FACE_BACK);
+ break;
+ }
+
+ SB_BEGIN_3D(so, POLYGON_STIPPLE_ENABLE, 1);
+ SB_DATA (so, cso->poly_stipple_enable);
+ SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3);
+ SB_DATA (so, cso->offset_point);
+ SB_DATA (so, cso->offset_line);
+ SB_DATA (so, cso->offset_tri);
+
+ if (cso->offset_point || cso->offset_line || cso->offset_tri) {
+ SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
+ SB_DATA (so, fui(cso->offset_scale));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
+ SB_DATA (so, fui(cso->offset_units * 2.0f));
+ }
+
+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- memcpy(nv50->sampler[type], sampler, nr * sizeof(void *));
+ nv50->rast = hwcso;
+ nv50->dirty |= NV50_NEW_RASTERIZER;
+}
+
+static void
+nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
- nv50->sampler_nr[type] = nr;
- nv50->dirty |= NV50_NEW_SAMPLER;
+static void *
+nv50_zsa_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv50_zsa_stateobj *so = CALLOC_STRUCT(nv50_zsa_stateobj);
+
+ so->pipe = *cso;
+
+ SB_BEGIN_3D(so, DEPTH_WRITE_ENABLE, 1);
+ SB_DATA (so, cso->depth.writemask);
+ SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1);
+ if (cso->depth.enabled) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1);
+ SB_DATA (so, nvgl_comparison_op(cso->depth.func));
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ if (cso->stencil[0].enabled) {
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
+ SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2);
+ SB_DATA (so, cso->stencil[0].writemask);
+ SB_DATA (so, cso->stencil[0].valuemask);
+ } else {
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 1);
+ SB_DATA (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ assert(cso->stencil[0].enabled);
+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func));
+ SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
+ SB_DATA (so, cso->stencil[1].writemask);
+ SB_DATA (so, cso->stencil[1].valuemask);
+ } else {
+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 1);
+ SB_DATA (so, 0);
+ }
+
+ SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1);
+ if (cso->alpha.enabled) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, ALPHA_TEST_REF, 2);
+ SB_DATA (so, fui(cso->alpha.ref_value));
+ SB_DATA (so, nvgl_comparison_op(cso->alpha.func));
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
}
static void
-nv50_vp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
{
- nv50_sampler_state_bind(pipe, 0, nr, s);
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->zsa = hwcso;
+ nv50->dirty |= NV50_NEW_ZSA;
}
static void
-nv50_fp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
{
- nv50_sampler_state_bind(pipe, 2, nr, s);
+ FREE(hwcso);
+}
+
+/* ====================== SAMPLERS AND TEXTURES ================================
+ */
+
+#define NV50_TSC_WRAP_CASE(n) \
+ case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
+
+static INLINE unsigned
+nv50_tsc_wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ NV50_TSC_WRAP_CASE(REPEAT);
+ NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(CLAMP);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ return NV50_TSC_WRAP_REPEAT;
+ }
+}
+
+void *
+nv50_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv50_tsc_entry *so = CALLOC_STRUCT(nv50_tsc_entry);
+ float f[2];
+
+ so->id = -1;
+
+ so->tsc[0] = (0x00026000 |
+ (nv50_tsc_wrap_mode(cso->wrap_s) << 0) |
+ (nv50_tsc_wrap_mode(cso->wrap_t) << 3) |
+ (nv50_tsc_wrap_mode(cso->wrap_r) << 6));
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ so->tsc[1] |= NV50_TSC_1_MAGF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ so->tsc[1] |= NV50_TSC_1_MINF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ so->tsc[1] |= NV50_TSC_1_MINF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ so->tsc[1] |= NV50_TSC_1_MIPF_NONE;
+ break;
+ }
+
+ if (cso->max_anisotropy >= 16)
+ so->tsc[0] |= (7 << 20);
+ else
+ if (cso->max_anisotropy >= 12)
+ so->tsc[0] |= (6 << 20);
+ else {
+ so->tsc[0] |= (cso->max_anisotropy >> 1) << 20;
+
+ if (cso->max_anisotropy >= 4)
+ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35;
+ else
+ if (cso->max_anisotropy >= 2)
+ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15;
+ }
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ /* NOTE: must be deactivated for non-shadow textures */
+ so->tsc[0] |= (1 << 9);
+ so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
+ }
+
+ f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f);
+ so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12;
+
+ f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f);
+ f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f);
+ so->tsc[2] |=
+ (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff);
+
+ so->tsc[4] = fui(cso->border_color[0]);
+ so->tsc[5] = fui(cso->border_color[1]);
+ so->tsc[6] = fui(cso->border_color[2]);
+ so->tsc[7] = fui(cso->border_color[3]);
+
+ return (void *)so;
}
static void
nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
- FREE(hwcso);
+ unsigned s, i;
+
+ for (s = 0; s < 5; ++s)
+ for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i)
+ if (nv50_context(pipe)->samplers[s][i] == hwcso)
+ nv50_context(pipe)->samplers[s][i] = NULL;
+
+ nv50_screen_tsc_free(nv50_context(pipe)->screen, nv50_tsc_entry(hwcso));
+
+ FREE(hwcso);
}
static INLINE void
-nv50_set_sampler_views(struct pipe_context *pipe, unsigned p,
- unsigned nr,
- struct pipe_sampler_view **views)
+nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
+ unsigned nr, void **hwcso)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- unsigned i;
+ unsigned i;
- for (i = 0; i < nr; i++)
- pipe_sampler_view_reference(&nv50->sampler_views[p][i],
- views[i]);
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tsc_entry *old = nv50->samplers[s][i];
- for (i = nr; i < nv50->sampler_view_nr[p]; i++)
- pipe_sampler_view_reference(&nv50->sampler_views[p][i], NULL);
+ nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
+ if (old)
+ nv50_screen_tsc_unlock(nv50->screen, old);
+ }
+ for (; i < nv50->num_samplers[s]; ++i)
+ if (nv50->samplers[s][i])
+ nv50_screen_tsc_unlock(nv50->screen, nv50->samplers[s][i]);
- nv50->sampler_view_nr[p] = nr;
- nv50->dirty |= NV50_NEW_TEXTURE;
+ nv50->num_samplers[s] = nr;
+
+ nv50->dirty |= NV50_NEW_SAMPLERS;
}
static void
-nv50_set_vp_sampler_views(struct pipe_context *pipe,
- unsigned nr,
- struct pipe_sampler_view **views)
+nv50_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
{
- nv50_set_sampler_views(pipe, 0, nr, views);
+ nv50_stage_sampler_states_bind(nv50_context(pipe), 0, nr, s);
}
static void
-nv50_set_fp_sampler_views(struct pipe_context *pipe,
- unsigned nr,
- struct pipe_sampler_view **views)
+nv50_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
{
- nv50_set_sampler_views(pipe, 2, nr, views);
+ nv50_stage_sampler_states_bind(nv50_context(pipe), 2, nr, s);
}
static void
-nv50_sampler_view_destroy(struct pipe_context *pipe,
- struct pipe_sampler_view *view)
+nv50_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
{
- pipe_resource_reference(&view->texture, NULL);
- FREE(nv50_sampler_view(view));
+ nv50_stage_sampler_states_bind(nv50_context(pipe), 1, nr, s);
}
-static struct pipe_sampler_view *
-nv50_create_sampler_view(struct pipe_context *pipe,
- struct pipe_resource *texture,
- const struct pipe_sampler_view *templ)
+/* NOTE: only called when not referenced anywhere, won't be bound */
+static void
+nv50_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
{
- struct nv50_sampler_view *view = CALLOC_STRUCT(nv50_sampler_view);
+ pipe_resource_reference(&view->texture, NULL);
- view->pipe = *templ;
- view->pipe.reference.count = 1;
- view->pipe.texture = NULL;
- pipe_resource_reference(&view->pipe.texture, texture);
- view->pipe.context = pipe;
+ nv50_screen_tic_free(nv50_context(pipe)->screen, nv50_tic_entry(view));
- if (!nv50_tex_construct(view)) {
- nv50_sampler_view_destroy(pipe, &view->pipe);
- return NULL;
- }
- return &view->pipe;
+ FREE(nv50_tic_entry(view));
}
+static INLINE void
+nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]);
+ if (old)
+ nv50_screen_tic_unlock(nv50->screen, old);
-static void *
-nv50_rasterizer_state_create(struct pipe_context *pipe,
- const struct pipe_rasterizer_state *cso)
-{
- struct nouveau_stateobj *so = so_new(16, 22, 0);
- struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
- struct nv50_rasterizer_stateobj *rso =
- CALLOC_STRUCT(nv50_rasterizer_stateobj);
-
- /*XXX: ignored
- * - light_twoside
- * - point_smooth
- * - multisample
- * - point_sprite / sprite_coord_mode
- */
-
- so_method(so, tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
- so_data (so, cso->scissor);
-
- so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
- so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
- NV50TCL_SHADE_MODEL_SMOOTH);
- so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1);
- so_data (so, cso->flatshade_first ? 0 : 1);
-
- so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1);
- so_data (so, cso->light_twoside);
-
- so_method(so, tesla, NV50TCL_LINE_WIDTH, 1);
- so_data (so, fui(cso->line_width));
- so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1);
- so_data (so, cso->line_smooth ? 1 : 0);
- if (cso->line_stipple_enable) {
- so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1);
- so_data (so, 1);
- so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1);
- so_data (so, (cso->line_stipple_pattern << 8) |
- cso->line_stipple_factor);
- } else {
- so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1);
- so_data (so, 0);
- }
-
- so_method(so, tesla, NV50TCL_POINT_SIZE, 1);
- so_data (so, fui(cso->point_size));
-
- so_method(so, tesla, NV50TCL_POINT_SPRITE_ENABLE, 1);
- so_data (so, cso->point_quad_rasterization ? 1 : 0);
-
- so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3);
- so_data(so, nvgl_polygon_mode(cso->fill_front));
- so_data(so, nvgl_polygon_mode(cso->fill_back));
- so_data(so, cso->poly_smooth ? 1 : 0);
-
- so_method(so, tesla, NV50TCL_CULL_FACE_ENABLE, 3);
- so_data (so, cso->cull_face != PIPE_FACE_NONE);
- if (cso->front_ccw) {
- so_data(so, NV50TCL_FRONT_FACE_CCW);
- }
- else {
- so_data(so, NV50TCL_FRONT_FACE_CW);
- }
- switch (cso->cull_face) {
- case PIPE_FACE_FRONT:
- so_data(so, NV50TCL_CULL_FACE_FRONT);
- break;
- case PIPE_FACE_BACK:
- so_data(so, NV50TCL_CULL_FACE_BACK);
- break;
- case PIPE_FACE_FRONT_AND_BACK:
- so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK);
- break;
- default:
- so_data(so, NV50TCL_CULL_FACE_BACK);
- break;
- }
-
- so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_ENABLE, 1);
- so_data (so, cso->poly_stipple_enable ? 1 : 0);
-
- so_method(so, tesla, NV50TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
- so_data(so, cso->offset_point);
- so_data(so, cso->offset_line);
- so_data(so, cso->offset_tri);
-
- if (cso->offset_point ||
- cso->offset_line ||
- cso->offset_tri) {
- so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1);
- so_data (so, fui(cso->offset_scale));
- so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1);
- so_data (so, fui(cso->offset_units * 2.0f));
- }
-
- rso->pipe = *cso;
- so_ref(so, &rso->so);
- so_ref(NULL, &so);
- return (void *)rso;
+ pipe_sampler_view_reference(&nv50->textures[s][i], views[i]);
+ }
+
+ for (i = nr; i < nv50->num_textures[s]; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]);
+ if (!old)
+ continue;
+ nv50_screen_tic_unlock(nv50->screen, old);
+
+ pipe_sampler_view_reference(&nv50->textures[s][i], NULL);
+ }
+
+ nv50->num_textures[s] = nr;
+
+ nv50_bufctx_reset(nv50, NV50_BUFCTX_TEXTURES);
+
+ nv50->dirty |= NV50_NEW_TEXTURES;
}
static void
-nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+nv50_vp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
{
- struct nv50_context *nv50 = nv50_context(pipe);
-
- nv50->rasterizer = hwcso;
- nv50->dirty |= NV50_NEW_RASTERIZER;
+ nv50_stage_set_sampler_views(nv50_context(pipe), 0, nr, views);
}
static void
-nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+nv50_fp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
{
- struct nv50_rasterizer_stateobj *rso = hwcso;
-
- so_ref(NULL, &rso->so);
- FREE(rso);
+ nv50_stage_set_sampler_views(nv50_context(pipe), 2, nr, views);
}
-static void *
-nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
- const struct pipe_depth_stencil_alpha_state *cso)
-{
- struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
- struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj);
- struct nouveau_stateobj *so = so_new(9, 21, 0);
-
- so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1);
- so_data (so, cso->depth.writemask ? 1 : 0);
- if (cso->depth.enabled) {
- so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1);
- so_data (so, 1);
- so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1);
- so_data (so, nvgl_comparison_op(cso->depth.func));
- } else {
- so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1);
- so_data (so, 0);
- }
-
- if (cso->stencil[0].enabled) {
- so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 5);
- so_data (so, 1);
- so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
- so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
- so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
- so_data (so, nvgl_comparison_op(cso->stencil[0].func));
- so_method(so, tesla, NV50TCL_STENCIL_FRONT_MASK, 2);
- so_data (so, cso->stencil[0].writemask);
- so_data (so, cso->stencil[0].valuemask);
- } else {
- so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1);
- so_data (so, 0);
- }
-
- if (cso->stencil[1].enabled) {
- so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5);
- so_data (so, 1);
- so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
- so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
- so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
- so_data (so, nvgl_comparison_op(cso->stencil[1].func));
- so_method(so, tesla, NV50TCL_STENCIL_BACK_MASK, 2);
- so_data (so, cso->stencil[1].writemask);
- so_data (so, cso->stencil[1].valuemask);
- } else {
- so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1);
- so_data (so, 0);
- }
-
- if (cso->alpha.enabled) {
- so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1);
- so_data (so, 1);
- so_method(so, tesla, NV50TCL_ALPHA_TEST_REF, 2);
- so_data (so, fui(cso->alpha.ref_value));
- so_data (so, nvgl_comparison_op(cso->alpha.func));
- } else {
- so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1);
- so_data (so, 0);
- }
-
- zsa->pipe = *cso;
- so_ref(so, &zsa->so);
- so_ref(NULL, &so);
- return (void *)zsa;
+static void
+nv50_gp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nv50_stage_set_sampler_views(nv50_context(pipe), 1, nr, views);
}
-static void
-nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+/* ============================= SHADERS =======================================
+ */
+
+static void *
+nv50_sp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso, unsigned type)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_program *prog;
- nv50->zsa = hwcso;
- nv50->dirty |= NV50_NEW_ZSA;
+ prog = CALLOC_STRUCT(nv50_program);
+ if (!prog)
+ return NULL;
+
+ prog->type = type;
+ prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ return (void *)prog;
}
static void
-nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+nv50_sp_state_delete(struct pipe_context *pipe, void *hwcso)
{
- struct nv50_zsa_stateobj *zsa = hwcso;
+ struct nv50_program *prog = (struct nv50_program *)hwcso;
+
+ nv50_program_destroy(nv50_context(pipe), prog);
- so_ref(NULL, &zsa->so);
- FREE(zsa);
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
}
static void *
nv50_vp_state_create(struct pipe_context *pipe,
- const struct pipe_shader_state *cso)
+ const struct pipe_shader_state *cso)
{
- struct nv50_program *p = CALLOC_STRUCT(nv50_program);
-
- p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
- p->type = PIPE_SHADER_VERTEX;
- return (void *)p;
+ return nv50_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX);
}
static void
nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50->vertprog = hwcso;
- nv50->dirty |= NV50_NEW_VERTPROG;
-}
-
-static void
-nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso)
-{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nv50_program *p = hwcso;
-
- nv50_program_destroy(nv50, p);
- FREE((void *)p->pipe.tokens);
- FREE(p);
+ nv50->vertprog = hwcso;
+ nv50->dirty |= NV50_NEW_VERTPROG;
}
static void *
nv50_fp_state_create(struct pipe_context *pipe,
- const struct pipe_shader_state *cso)
+ const struct pipe_shader_state *cso)
{
- struct nv50_program *p = CALLOC_STRUCT(nv50_program);
-
- p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
- p->type = PIPE_SHADER_FRAGMENT;
- return (void *)p;
+ return nv50_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT);
}
static void
nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso)
{
- struct nv50_context *nv50 = nv50_context(pipe);
-
- nv50->fragprog = hwcso;
- nv50->dirty |= NV50_NEW_FRAGPROG;
-}
-
-static void
-nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso)
-{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nv50_program *p = hwcso;
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50_program_destroy(nv50, p);
- FREE((void *)p->pipe.tokens);
- FREE(p);
+ nv50->fragprog = hwcso;
+ nv50->dirty |= NV50_NEW_FRAGPROG;
}
static void *
nv50_gp_state_create(struct pipe_context *pipe,
- const struct pipe_shader_state *cso)
+ const struct pipe_shader_state *cso)
{
- struct nv50_program *p = CALLOC_STRUCT(nv50_program);
-
- p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
- p->type = PIPE_SHADER_GEOMETRY;
- return (void *)p;
+ return nv50_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY);
}
static void
nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50->geomprog = hwcso;
- nv50->dirty |= NV50_NEW_GEOMPROG;
+ nv50->gmtyprog = hwcso;
+ nv50->dirty |= NV50_NEW_GMTYPROG;
}
static void
-nv50_gp_state_delete(struct pipe_context *pipe, void *hwcso)
+nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ struct pipe_resource *res)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nv50_program *p = hwcso;
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ if (nv50->constbuf[shader][index])
+ nv50_bufctx_del_resident(nv50, NV50_BUFCTX_CONSTANT,
+ nv04_resource(nv50->constbuf[shader][index]));
- nv50_program_destroy(nv50, p);
- FREE((void *)p->pipe.tokens);
- FREE(p);
+ pipe_resource_reference(&nv50->constbuf[shader][index], res);
+
+ nv50->constbuf_dirty[shader] |= 1 << index;
+
+ nv50->dirty |= NV50_NEW_CONSTBUF;
}
+/* =============================================================================
+ */
+
static void
nv50_set_blend_color(struct pipe_context *pipe,
- const struct pipe_blend_color *bcol)
+ const struct pipe_blend_color *bcol)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50->blend_colour = *bcol;
- nv50->dirty |= NV50_NEW_BLEND_COLOUR;
+ nv50->blend_colour = *bcol;
+ nv50->dirty |= NV50_NEW_BLEND_COLOUR;
}
- static void
+static void
nv50_set_stencil_ref(struct pipe_context *pipe,
- const struct pipe_stencil_ref *sr)
+ const struct pipe_stencil_ref *sr)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50->stencil_ref = *sr;
- nv50->dirty |= NV50_NEW_STENCIL_REF;
+ nv50->stencil_ref = *sr;
+ nv50->dirty |= NV50_NEW_STENCIL_REF;
}
static void
nv50_set_clip_state(struct pipe_context *pipe,
- const struct pipe_clip_state *clip)
+ const struct pipe_clip_state *clip)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
+ const unsigned size = clip->nr * sizeof(clip->ucp[0]);
- nv50->clip.depth_clamp = clip->depth_clamp;
- nv50->dirty |= NV50_NEW_CLIP;
-}
+ memcpy(&nv50->clip.ucp[0][0], &clip->ucp[0][0], size);
+ nv50->clip.nr = clip->nr;
-static void
-nv50_set_sample_mask(struct pipe_context *pipe,
- unsigned sample_mask)
-{
+ nv50->clip.depth_clamp = clip->depth_clamp;
+
+ nv50->dirty |= NV50_NEW_CLIP;
}
static void
-nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
- struct pipe_resource *buf )
+nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- if (shader == PIPE_SHADER_VERTEX) {
- nv50->constbuf[PIPE_SHADER_VERTEX] = buf;
- nv50->dirty |= NV50_NEW_VERTPROG_CB;
- } else
- if (shader == PIPE_SHADER_FRAGMENT) {
- nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf;
- nv50->dirty |= NV50_NEW_FRAGPROG_CB;
- } else
- if (shader == PIPE_SHADER_GEOMETRY) {
- nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf;
- nv50->dirty |= NV50_NEW_GEOMPROG_CB;
- }
+ nv50->sample_mask = sample_mask;
+ nv50->dirty |= NV50_NEW_SAMPLE_MASK;
}
+
static void
nv50_set_framebuffer_state(struct pipe_context *pipe,
- const struct pipe_framebuffer_state *fb)
+ const struct pipe_framebuffer_state *fb)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50->framebuffer = *fb;
- nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ nv50->framebuffer = *fb;
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
}
static void
nv50_set_polygon_stipple(struct pipe_context *pipe,
- const struct pipe_poly_stipple *stipple)
+ const struct pipe_poly_stipple *stipple)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50->stipple = *stipple;
- nv50->dirty |= NV50_NEW_STIPPLE;
+ nv50->stipple = *stipple;
+ nv50->dirty |= NV50_NEW_STIPPLE;
}
static void
nv50_set_scissor_state(struct pipe_context *pipe,
- const struct pipe_scissor_state *s)
+ const struct pipe_scissor_state *scissor)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50->scissor = *s;
- nv50->dirty |= NV50_NEW_SCISSOR;
+ nv50->scissor = *scissor;
+ nv50->dirty |= NV50_NEW_SCISSOR;
}
static void
nv50_set_viewport_state(struct pipe_context *pipe,
- const struct pipe_viewport_state *vpt)
+ const struct pipe_viewport_state *vpt)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50->viewport = *vpt;
- nv50->dirty |= NV50_NEW_VIEWPORT;
+ nv50->viewport = *vpt;
+ nv50->dirty |= NV50_NEW_VIEWPORT;
}
static void
-nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
- const struct pipe_vertex_buffer *vb)
+nv50_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *vb)
{
- struct nv50_context *nv50 = nv50_context(pipe);
-
- memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count);
- nv50->vtxbuf_nr = count;
+ struct nv50_context *nv50 = nv50_context(pipe);
+ unsigned i;
- nv50->dirty |= NV50_NEW_ARRAYS;
-}
+ for (i = 0; i < count; ++i)
+ pipe_resource_reference(&nv50->vtxbuf[i].buffer, vb[i].buffer);
+ for (; i < nv50->num_vtxbufs; ++i)
+ pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL);
-static void
-nv50_set_index_buffer(struct pipe_context *pipe,
- const struct pipe_index_buffer *ib)
-{
- struct nv50_context *nv50 = nv50_context(pipe);
+ memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count);
+ nv50->num_vtxbufs = count;
- if (ib)
- memcpy(&nv50->idxbuf, ib, sizeof(nv50->idxbuf));
- else
- memset(&nv50->idxbuf, 0, sizeof(nv50->idxbuf));
+ nv50_bufctx_reset(nv50, NV50_BUFCTX_VERTEX);
- /* TODO make this more like a state */
+ nv50->dirty |= NV50_NEW_ARRAYS;
}
-static void *
-nv50_vtxelts_state_create(struct pipe_context *pipe,
- unsigned num_elements,
- const struct pipe_vertex_element *elements)
+static void
+nv50_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
{
- struct nv50_vtxelt_stateobj *cso = CALLOC_STRUCT(nv50_vtxelt_stateobj);
-
- assert(num_elements < 16); /* not doing fallbacks yet */
- cso->num_elements = num_elements;
- memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
-
- nv50_vtxelt_construct(cso);
+ struct nv50_context *nv50 = nv50_context(pipe);
- return (void *)cso;
-}
+ if (ib) {
+ pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer);
-static void
-nv50_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
-{
- FREE(hwcso);
+ memcpy(&nv50->idxbuf, ib, sizeof(nv50->idxbuf));
+ } else {
+ pipe_resource_reference(&nv50->idxbuf.buffer, NULL);
+ }
}
static void
-nv50_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_context *nv50 = nv50_context(pipe);
- nv50->vtxelt = hwcso;
- nv50->dirty |= NV50_NEW_ARRAYS;
+ nv50->vertex = hwcso;
+ nv50->dirty |= NV50_NEW_VERTEX;
}
void
nv50_init_state_functions(struct nv50_context *nv50)
{
- nv50->pipe.create_blend_state = nv50_blend_state_create;
- nv50->pipe.bind_blend_state = nv50_blend_state_bind;
- nv50->pipe.delete_blend_state = nv50_blend_state_delete;
-
- nv50->pipe.create_sampler_state = nv50_sampler_state_create;
- nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
- nv50->pipe.bind_fragment_sampler_states = nv50_fp_sampler_state_bind;
- nv50->pipe.bind_vertex_sampler_states = nv50_vp_sampler_state_bind;
- nv50->pipe.set_fragment_sampler_views = nv50_set_fp_sampler_views;
- nv50->pipe.set_vertex_sampler_views = nv50_set_vp_sampler_views;
- nv50->pipe.create_sampler_view = nv50_create_sampler_view;
- nv50->pipe.sampler_view_destroy = nv50_sampler_view_destroy;
-
- nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
- nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
- nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete;
-
- nv50->pipe.create_depth_stencil_alpha_state =
- nv50_depth_stencil_alpha_state_create;
- nv50->pipe.bind_depth_stencil_alpha_state =
- nv50_depth_stencil_alpha_state_bind;
- nv50->pipe.delete_depth_stencil_alpha_state =
- nv50_depth_stencil_alpha_state_delete;
-
- nv50->pipe.create_vs_state = nv50_vp_state_create;
- nv50->pipe.bind_vs_state = nv50_vp_state_bind;
- nv50->pipe.delete_vs_state = nv50_vp_state_delete;
-
- nv50->pipe.create_fs_state = nv50_fp_state_create;
- nv50->pipe.bind_fs_state = nv50_fp_state_bind;
- nv50->pipe.delete_fs_state = nv50_fp_state_delete;
-
- nv50->pipe.create_gs_state = nv50_gp_state_create;
- nv50->pipe.bind_gs_state = nv50_gp_state_bind;
- nv50->pipe.delete_gs_state = nv50_gp_state_delete;
-
- nv50->pipe.set_blend_color = nv50_set_blend_color;
- nv50->pipe.set_stencil_ref = nv50_set_stencil_ref;
- nv50->pipe.set_clip_state = nv50_set_clip_state;
- nv50->pipe.set_sample_mask = nv50_set_sample_mask;
- nv50->pipe.set_constant_buffer = nv50_set_constant_buffer;
- nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state;
- nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple;
- nv50->pipe.set_scissor_state = nv50_set_scissor_state;
- nv50->pipe.set_viewport_state = nv50_set_viewport_state;
-
- nv50->pipe.create_vertex_elements_state = nv50_vtxelts_state_create;
- nv50->pipe.delete_vertex_elements_state = nv50_vtxelts_state_delete;
- nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind;
-
- nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers;
- nv50->pipe.set_index_buffer = nv50_set_index_buffer;
+ struct pipe_context *pipe = &nv50->base.pipe;
+
+ pipe->create_blend_state = nv50_blend_state_create;
+ pipe->bind_blend_state = nv50_blend_state_bind;
+ pipe->delete_blend_state = nv50_blend_state_delete;
+
+ pipe->create_rasterizer_state = nv50_rasterizer_state_create;
+ pipe->bind_rasterizer_state = nv50_rasterizer_state_bind;
+ pipe->delete_rasterizer_state = nv50_rasterizer_state_delete;
+
+ pipe->create_depth_stencil_alpha_state = nv50_zsa_state_create;
+ pipe->bind_depth_stencil_alpha_state = nv50_zsa_state_bind;
+ pipe->delete_depth_stencil_alpha_state = nv50_zsa_state_delete;
+
+ pipe->create_sampler_state = nv50_sampler_state_create;
+ pipe->delete_sampler_state = nv50_sampler_state_delete;
+ pipe->bind_vertex_sampler_states = nv50_vp_sampler_states_bind;
+ pipe->bind_fragment_sampler_states = nv50_fp_sampler_states_bind;
+ pipe->bind_geometry_sampler_states = nv50_gp_sampler_states_bind;
+
+ pipe->create_sampler_view = nv50_create_sampler_view;
+ pipe->sampler_view_destroy = nv50_sampler_view_destroy;
+ pipe->set_vertex_sampler_views = nv50_vp_set_sampler_views;
+ pipe->set_fragment_sampler_views = nv50_fp_set_sampler_views;
+ pipe->set_geometry_sampler_views = nv50_gp_set_sampler_views;
+
+ pipe->create_vs_state = nv50_vp_state_create;
+ pipe->create_fs_state = nv50_fp_state_create;
+ pipe->create_gs_state = nv50_gp_state_create;
+ pipe->bind_vs_state = nv50_vp_state_bind;
+ pipe->bind_fs_state = nv50_fp_state_bind;
+ pipe->bind_gs_state = nv50_gp_state_bind;
+ pipe->delete_vs_state = nv50_sp_state_delete;
+ pipe->delete_fs_state = nv50_sp_state_delete;
+ pipe->delete_gs_state = nv50_sp_state_delete;
+
+ pipe->set_blend_color = nv50_set_blend_color;
+ pipe->set_stencil_ref = nv50_set_stencil_ref;
+ pipe->set_clip_state = nv50_set_clip_state;
+ pipe->set_sample_mask = nv50_set_sample_mask;
+ pipe->set_constant_buffer = nv50_set_constant_buffer;
+ pipe->set_framebuffer_state = nv50_set_framebuffer_state;
+ pipe->set_polygon_stipple = nv50_set_polygon_stipple;
+ pipe->set_scissor_state = nv50_set_scissor_state;
+ pipe->set_viewport_state = nv50_set_viewport_state;
+
+ pipe->create_vertex_elements_state = nv50_vertex_state_create;
+ pipe->delete_vertex_elements_state = nv50_vertex_state_delete;
+ pipe->bind_vertex_elements_state = nv50_vertex_state_bind;
+
+ pipe->set_vertex_buffers = nv50_set_vertex_buffers;
+ pipe->set_index_buffer = nv50_set_index_buffer;
+
+ pipe->redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index ae02143e35..f3d45eb95e 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -1,449 +1,342 @@
-/*
- * Copyright 2008 Ben Skeggs
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
- * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "util/u_format.h"
#include "nv50_context.h"
-#include "nv50_resource.h"
-#include "nouveau/nouveau_stateobj.h"
+#include "os/os_time.h"
-static struct nouveau_stateobj *
-validate_fb(struct nv50_context *nv50)
+static void
+nv50_validate_fb(struct nv50_context *nv50)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(32, 79, 18);
- struct pipe_framebuffer_state *fb = &nv50->framebuffer;
- unsigned i, w = 0, h = 0, gw = 0;
-
- /* Set nr of active RTs and select RT for each colour output.
- * FP result 0 always goes to RT[0], bits 4 - 6 are ignored.
- * Ambiguous assignment results in no rendering (no DATA_ERROR).
- */
- so_method(so, tesla, NV50TCL_RT_CONTROL, 1);
- so_data (so, fb->nr_cbufs |
- (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) |
- (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25));
-
- for (i = 0; i < fb->nr_cbufs; i++) {
- struct pipe_resource *pt = fb->cbufs[i]->texture;
- struct nouveau_bo *bo = nv50_miptree(pt)->base.bo;
-
- if (!gw) {
- w = fb->cbufs[i]->width;
- h = fb->cbufs[i]->height;
- gw = 1;
- } else {
- assert(w == fb->cbufs[i]->width);
- assert(h == fb->cbufs[i]->height);
- }
-
- assert(nv50_format_table[fb->cbufs[i]->format].rt);
-
- so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2);
- so_data (so, fb->cbufs[i]->width);
- so_data (so, fb->cbufs[i]->height);
-
- so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5);
- so_reloc (so, bo, ((struct nv50_surface *)fb->cbufs[i])->offset, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
- so_reloc (so, bo, ((struct nv50_surface *)fb->cbufs[i])->offset, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
- so_data (so, nv50_format_table[fb->cbufs[i]->format].rt);
- so_data (so, nv50_miptree(pt)->
- level[fb->cbufs[i]->u.tex.level].tile_mode << 4);
- so_data(so, 0x00000000);
-
- so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1);
- so_data (so, 1);
- }
-
- if (fb->zsbuf) {
- struct pipe_resource *pt = fb->zsbuf->texture;
- struct nouveau_bo *bo = nv50_miptree(pt)->base.bo;
-
- if (!gw) {
- w = fb->zsbuf->width;
- h = fb->zsbuf->height;
- gw = 1;
- } else {
- assert(w == fb->zsbuf->width);
- assert(h == fb->zsbuf->height);
- }
-
- assert(nv50_format_table[fb->zsbuf->format].rt);
-
- so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
- so_reloc (so, bo, ((struct nv50_surface *)(fb->zsbuf))->offset, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
- so_reloc (so, bo, ((struct nv50_surface *)(fb->zsbuf))->offset, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
- so_data (so, nv50_format_table[fb->zsbuf->format].rt);
- so_data (so, nv50_miptree(pt)->
- level[fb->zsbuf->u.tex.level].tile_mode << 4);
- so_data (so, 0x00000000);
-
- so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
- so_data (so, 1);
- so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3);
- so_data (so, fb->zsbuf->width);
- so_data (so, fb->zsbuf->height);
- so_data (so, 0x00010001);
- } else {
- so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
- so_data (so, 0);
- }
-
- so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2);
- so_data (so, w << 16);
- so_data (so, h << 16);
- /* set window lower left corner */
- so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2);
- so_data (so, 0);
- so_data (so, 0);
- /* set screen scissor rectangle */
- so_method(so, tesla, NV50TCL_SCREEN_SCISSOR_HORIZ, 2);
- so_data (so, w << 16);
- so_data (so, h << 16);
-
- return so;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ unsigned i;
+ boolean serialize = FALSE;
+
+ nv50_bufctx_reset(nv50, NV50_BUFCTX_FRAME);
+
+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+ OUT_RING (chan, (076543210 << 4) | fb->nr_cbufs);
+ BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2);
+ OUT_RING (chan, fb->width << 16);
+ OUT_RING (chan, fb->height << 16);
+
+ MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs);
+
+ for (i = 0; i < fb->nr_cbufs; ++i) {
+ struct nv50_miptree *mt = nv50_miptree(fb->cbufs[i]->texture);
+ struct nv50_surface *sf = nv50_surface(fb->cbufs[i]);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t offset = sf->offset;
+
+ BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 5);
+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, nv50_format_table[sf->base.format].rt);
+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode << 4);
+ OUT_RING (chan, mt->layer_stride >> 2);
+ BEGIN_RING(chan, RING_3D(RT_HORIZ(i)), 2);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ BEGIN_RING(chan, RING_3D(RT_ARRAY_MODE), 1);
+ OUT_RING (chan, sf->depth);
+
+ if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ /* only register for writing, otherwise we'd always serialize here */
+ nv50_bufctx_add_resident(nv50, NV50_BUFCTX_FRAME, &mt->base,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ }
+
+ if (fb->zsbuf) {
+ struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
+ struct nv50_surface *sf = nv50_surface(fb->zsbuf);
+ struct nouveau_bo *bo = mt->base.bo;
+ int unk = mt->base.base.target == PIPE_TEXTURE_2D;
+ uint32_t offset = sf->offset;
+
+ MARK_RING (chan, 12, 2);
+ BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, nv50_format_table[fb->zsbuf->format].rt);
+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode << 4);
+ OUT_RING (chan, mt->layer_stride >> 2);
+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ OUT_RING (chan, (unk << 16) | sf->depth);
+
+ if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nv50_bufctx_add_resident(nv50, NV50_BUFCTX_FRAME, &mt->base,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ } else {
+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+ OUT_RING (chan, 0);
+ }
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+ OUT_RING (chan, fb->width << 16);
+ OUT_RING (chan, fb->height << 16);
+
+ if (serialize) {
+ BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
+ OUT_RING (chan, 0);
+ }
}
static void
-nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
- unsigned p)
+nv50_validate_blend_colour(struct nv50_context *nv50)
{
- struct nouveau_grobj *eng2d = nv50->screen->eng2d;
- unsigned i, j, dw = nv50->sampler_nr[p] * 8;
-
- if (!dw)
- return;
- nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
- p * (32 * 8 * 4), dw * 4);
-
- so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), dw);
-
- for (i = 0; i < nv50->sampler_nr[p]; ++i) {
- if (nv50->sampler[p][i])
- so_datap(so, nv50->sampler[p][i]->tsc, 8);
- else {
- for (j = 0; j < 8; ++j) /* you get punished */
- so_data(so, 0); /* ... for leaving holes */
- }
- }
-}
+ struct nouveau_channel *chan = nv50->screen->base.channel;
-static struct nouveau_stateobj *
-validate_blend(struct nv50_context *nv50)
-{
- struct nouveau_stateobj *so = NULL;
- so_ref(nv50->blend->so, &so);
- return so;
+ BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4);
+ OUT_RINGf (chan, nv50->blend_colour.color[0]);
+ OUT_RINGf (chan, nv50->blend_colour.color[1]);
+ OUT_RINGf (chan, nv50->blend_colour.color[2]);
+ OUT_RINGf (chan, nv50->blend_colour.color[3]);
}
-static struct nouveau_stateobj *
-validate_zsa(struct nv50_context *nv50)
+static void
+nv50_validate_stencil_ref(struct nv50_context *nv50)
{
- struct nouveau_stateobj *so = NULL;
- so_ref(nv50->zsa->so, &so);
- return so;
-}
+ struct nouveau_channel *chan = nv50->screen->base.channel;
-static struct nouveau_stateobj *
-validate_rast(struct nv50_context *nv50)
-{
- struct nouveau_stateobj *so = NULL;
- so_ref(nv50->rasterizer->so, &so);
- return so;
+ BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1);
+ OUT_RING (chan, nv50->stencil_ref.ref_value[0]);
+ BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1);
+ OUT_RING (chan, nv50->stencil_ref.ref_value[1]);
}
-static struct nouveau_stateobj *
-validate_blend_colour(struct nv50_context *nv50)
+static void
+nv50_validate_stipple(struct nv50_context *nv50)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(1, 4, 0);
-
- so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
- so_data (so, fui(nv50->blend_colour.color[0]));
- so_data (so, fui(nv50->blend_colour.color[1]));
- so_data (so, fui(nv50->blend_colour.color[2]));
- so_data (so, fui(nv50->blend_colour.color[3]));
- return so;
-}
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ unsigned i;
-static struct nouveau_stateobj *
-validate_stencil_ref(struct nv50_context *nv50)
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(2, 2, 0);
-
- so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
- so_data (so, nv50->stencil_ref.ref_value[0]);
- so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
- so_data (so, nv50->stencil_ref.ref_value[1]);
- return so;
+ BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
+ for (i = 0; i < 32; ++i)
+ OUT_RING(chan, util_bswap32(nv50->stipple.stipple[i]));
}
-static struct nouveau_stateobj *
-validate_stipple(struct nv50_context *nv50)
+static void
+nv50_validate_scissor(struct nv50_context *nv50)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(1, 32, 0);
- int i;
-
- so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
- for (i = 0; i < 32; i++)
- so_data(so, util_bswap32(nv50->stipple.stipple[i]));
- return so;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct pipe_scissor_state *s = &nv50->scissor;
+#ifdef NV50_SCISSORS_CLIPPING
+ struct pipe_viewport_state *vp = &nv50->viewport;
+ int minx, maxx, miny, maxy;
+
+ if (!(nv50->dirty &
+ (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) &&
+ nv50->state.scissor == nv50->rast->pipe.scissor)
+ return;
+ nv50->state.scissor = nv50->rast->pipe.scissor;
+
+ if (nv50->state.scissor) {
+ minx = s->minx;
+ maxx = s->maxx;
+ miny = s->miny;
+ maxy = s->maxy;
+ } else {
+ minx = 0;
+ maxx = nv50->framebuffer.width;
+ miny = 0;
+ maxy = nv50->framebuffer.height;
+ }
+
+ minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0])));
+ maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0])));
+ miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1])));
+ maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1])));
+
+ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+ OUT_RING (chan, (maxx << 16) | minx);
+ OUT_RING (chan, (maxy << 16) | miny);
+#else
+ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+ OUT_RING (chan, (s->maxx << 16) | s->minx);
+ OUT_RING (chan, (s->maxy << 16) | s->miny);
+#endif
}
-static struct nouveau_stateobj *
-validate_scissor(struct nv50_context *nv50)
+static void
+nv50_validate_viewport(struct nv50_context *nv50)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct pipe_scissor_state *s = &nv50->scissor;
- struct nouveau_stateobj *so;
-
- so = so_new(1, 2, 0);
- so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
- so_data (so, (s->maxx << 16) | s->minx);
- so_data (so, (s->maxy << 16) | s->miny);
- return so;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ float zmin, zmax;
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3);
+ OUT_RINGf (chan, nv50->viewport.translate[0]);
+ OUT_RINGf (chan, nv50->viewport.translate[1]);
+ OUT_RINGf (chan, nv50->viewport.translate[2]);
+ BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3);
+ OUT_RINGf (chan, nv50->viewport.scale[0]);
+ OUT_RINGf (chan, nv50->viewport.scale[1]);
+ OUT_RINGf (chan, nv50->viewport.scale[2]);
+
+ zmin = nv50->viewport.translate[2] - fabsf(nv50->viewport.scale[2]);
+ zmax = nv50->viewport.translate[2] + fabsf(nv50->viewport.scale[2]);
+
+#ifdef NV50_SCISSORS_CLIPPING
+ BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
+ OUT_RINGf (chan, zmin);
+ OUT_RINGf (chan, zmax);
+#endif
}
-static struct nouveau_stateobj *
-validate_viewport(struct nv50_context *nv50)
+static void
+nv50_validate_clip(struct nv50_context *nv50)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(3, 7, 0);
-
- so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
- so_data (so, fui(nv50->viewport.translate[0]));
- so_data (so, fui(nv50->viewport.translate[1]));
- so_data (so, fui(nv50->viewport.translate[2]));
- so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
- so_data (so, fui(nv50->viewport.scale[0]));
- so_data (so, fui(nv50->viewport.scale[1]));
- so_data (so, fui(nv50->viewport.scale[2]));
-
- /* no idea what 0f90 does */
- so_method(so, tesla, 0x0f90, 1);
- so_data (so, 0);
-
- return so;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ uint32_t clip;
+
+ if (nv50->clip.depth_clamp) {
+ clip =
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR |
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR |
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1;
+ } else {
+ clip = 0;
+ }
+
+#ifndef NV50_SCISSORS_CLIPPING
+ clip |=
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 |
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1;
+#endif
+
+ BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+ OUT_RING (chan, clip);
+
+ if (nv50->clip.nr) {
+ BEGIN_RING(chan, RING_3D(CB_ADDR), 1);
+ OUT_RING (chan, (0 << 8) | NV50_CB_AUX);
+ BEGIN_RING_NI(chan, RING_3D(CB_DATA(0)), nv50->clip.nr * 4);
+ OUT_RINGp (chan, &nv50->clip.ucp[0][0], nv50->clip.nr * 4);
+ }
+
+ BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1);
+ OUT_RING (chan, (1 << nv50->clip.nr) - 1);
}
-static struct nouveau_stateobj *
-validate_sampler(struct nv50_context *nv50)
+static void
+nv50_validate_blend(struct nv50_context *nv50)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so;
- unsigned nr = 0, i;
-
- for (i = 0; i < 3; ++i)
- nr += nv50->sampler_nr[i];
-
- so = so_new(1 + 5 * 3, 1 + 19 * 3 + nr * 8, 3 * 2);
-
- nv50_validate_samplers(nv50, so, 0); /* VP */
- nv50_validate_samplers(nv50, so, 2); /* FP */
+ struct nouveau_channel *chan = nv50->screen->base.channel;
- so_method(so, tesla, 0x1334, 1); /* flush TSC */
- so_data (so, 0);
-
- return so;
+ WAIT_RING(chan, nv50->blend->size);
+ OUT_RINGp(chan, nv50->blend->state, nv50->blend->size);
}
-static struct nouveau_stateobj *
-validate_vtxbuf(struct nv50_context *nv50)
+static void
+nv50_validate_zsa(struct nv50_context *nv50)
{
- struct nouveau_stateobj *so = NULL;
- so_ref(nv50->state.vtxbuf, &so);
- return so;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+
+ WAIT_RING(chan, nv50->zsa->size);
+ OUT_RINGp(chan, nv50->zsa->state, nv50->zsa->size);
}
-static struct nouveau_stateobj *
-validate_vtxattr(struct nv50_context *nv50)
+static void
+nv50_validate_rasterizer(struct nv50_context *nv50)
{
- struct nouveau_stateobj *so = NULL;
- so_ref(nv50->state.vtxattr, &so);
- return so;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+
+ WAIT_RING(chan, nv50->rast->size);
+ OUT_RINGp(chan, nv50->rast->state, nv50->rast->size);
}
-static struct nouveau_stateobj *
-validate_clip(struct nv50_context *nv50)
+static void
+nv50_switch_pipe_context(struct nv50_context *ctx_to)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(1, 1, 0);
- uint32_t vvcc;
+ struct nv50_context *ctx_from = ctx_to->screen->cur_ctx;
+
+ if (ctx_from)
+ ctx_to->state = ctx_from->state;
+
+ ctx_to->dirty = ~0;
- /* 0x0000 = remove whole primitive only (xyz)
- * 0x1018 = remove whole primitive only (xy), clamp z
- * 0x1080 = clip primitive (xyz)
- * 0x1098 = clip primitive (xy), clamp z
- */
- vvcc = nv50->clip.depth_clamp ? 0x1098 : 0x1080;
+ if (!ctx_to->vertex)
+ ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS);
- so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
- so_data (so, vvcc);
+ if (!ctx_to->vertprog)
+ ctx_to->dirty &= ~NV50_NEW_VERTPROG;
+ if (!ctx_to->fragprog)
+ ctx_to->dirty &= ~NV50_NEW_FRAGPROG;
- return so;
+ if (!ctx_to->blend)
+ ctx_to->dirty &= ~NV50_NEW_BLEND;
+ if (!ctx_to->rast)
+ ctx_to->dirty &= ~NV50_NEW_RASTERIZER;
+ if (!ctx_to->zsa)
+ ctx_to->dirty &= ~NV50_NEW_ZSA;
+
+ ctx_to->screen->base.channel->user_private = ctx_to->screen->cur_ctx =
+ ctx_to;
}
-struct state_validate {
- struct nouveau_stateobj *(*func)(struct nv50_context *nv50);
- unsigned states;
+static struct state_validate {
+ void (*func)(struct nv50_context *);
+ uint32_t states;
} validate_list[] = {
- { validate_fb , NV50_NEW_FRAMEBUFFER },
- { validate_blend , NV50_NEW_BLEND },
- { validate_zsa , NV50_NEW_ZSA },
- { nv50_vertprog_validate , NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB },
- { nv50_fragprog_validate , NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB },
- { nv50_geomprog_validate , NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB },
- { nv50_fp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG |
- NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER },
- { nv50_gp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG },
- { validate_rast , NV50_NEW_RASTERIZER },
- { validate_blend_colour , NV50_NEW_BLEND_COLOUR },
- { validate_stencil_ref , NV50_NEW_STENCIL_REF },
- { validate_stipple , NV50_NEW_STIPPLE },
- { validate_scissor , NV50_NEW_SCISSOR },
- { validate_viewport , NV50_NEW_VIEWPORT },
- { validate_sampler , NV50_NEW_SAMPLER },
- { nv50_tex_validate , NV50_NEW_TEXTURE | NV50_NEW_SAMPLER },
- { nv50_vbo_validate , NV50_NEW_ARRAYS },
- { validate_vtxbuf , NV50_NEW_ARRAYS },
- { validate_vtxattr , NV50_NEW_ARRAYS },
- { validate_clip , NV50_NEW_CLIP },
- { NULL , 0 }
+ { nv50_validate_fb, NV50_NEW_FRAMEBUFFER },
+ { nv50_validate_blend, NV50_NEW_BLEND },
+ { nv50_validate_zsa, NV50_NEW_ZSA },
+ { nv50_validate_rasterizer, NV50_NEW_RASTERIZER },
+ { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR },
+ { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF },
+ { nv50_validate_stipple, NV50_NEW_STIPPLE },
+#ifdef NV50_SCISSORS_CLIPPING
+ { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT |
+ NV50_NEW_RASTERIZER |
+ NV50_NEW_FRAMEBUFFER },
+#else
+ { nv50_validate_scissor, NV50_NEW_SCISSOR },
+#endif
+ { nv50_validate_viewport, NV50_NEW_VIEWPORT },
+ { nv50_validate_clip, NV50_NEW_CLIP },
+ { nv50_vertprog_validate, NV50_NEW_VERTPROG },
+ { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
+ { nv50_fragprog_validate, NV50_NEW_FRAGPROG },
+ { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
+ NV50_NEW_GMTYPROG },
+ { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
+ { nv50_sprite_coords_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
+ NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+ { nv50_constbufs_validate, NV50_NEW_CONSTBUF },
+ { nv50_validate_textures, NV50_NEW_TEXTURES },
+ { nv50_validate_samplers, NV50_NEW_SAMPLERS },
+ { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
boolean
-nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords)
+nv50_state_validate(struct nv50_context *nv50)
{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- unsigned nr_relocs = 128, nr_dwords = wait_dwords + 128 + 4;
- int ret, i;
-
- for (i = 0; i < validate_list_len; i++) {
- struct state_validate *validate = &validate_list[i];
- struct nouveau_stateobj *so;
-
- if (!(nv50->dirty & validate->states))
- continue;
-
- so = validate->func(nv50);
- if (!so)
- continue;
-
- nr_dwords += (so->total + so->cur);
- nr_relocs += so->cur_reloc;
-
- so_ref(so, &nv50->state.hw[i]);
- so_ref(NULL, &so);
- nv50->state.hw_dirty |= (1 << i);
- }
- nv50->dirty = 0;
-
- if (nv50->screen->cur_ctx != nv50) {
- for (i = 0; i < validate_list_len; i++) {
- if (!nv50->state.hw[i] ||
- (nv50->state.hw_dirty & (1 << i)))
- continue;
-
- nr_dwords += (nv50->state.hw[i]->total +
- nv50->state.hw[i]->cur);
- nr_relocs += nv50->state.hw[i]->cur_reloc;
- nv50->state.hw_dirty |= (1 << i);
- }
-
- nv50->screen->cur_ctx = nv50;
- }
-
- ret = MARK_RING(chan, nr_dwords, nr_relocs);
- if (ret) {
- debug_printf("MARK_RING(%d, %d) failed: %d\n",
- nr_dwords, nr_relocs, ret);
- return FALSE;
- }
-
- while (nv50->state.hw_dirty) {
- i = ffs(nv50->state.hw_dirty) - 1;
- nv50->state.hw_dirty &= ~(1 << i);
-
- so_emit(chan, nv50->state.hw[i]);
- }
-
- /* Yes, really, we need to do this. If a buffer that is referenced
- * on the hardware isn't part of changed state above, without doing
- * this the kernel is given no clue that the buffer is being used
- * still. This can cause all sorts of fun issues.
- */
- nv50_tex_relocs(nv50);
- so_emit_reloc_markers(chan, nv50->state.hw[0]); /* fb */
- so_emit_reloc_markers(chan, nv50->state.hw[3]); /* vp */
- so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */
- so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */
- nv50_screen_relocs(nv50->screen);
-
- /* No idea.. */
- BEGIN_RING(chan, tesla, 0x142c, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, tesla, 0x142c, 1);
- OUT_RING (chan, 0);
- return TRUE;
-}
+ unsigned i;
-void nv50_so_init_sifc(struct nv50_context *nv50,
- struct nouveau_stateobj *so,
- struct nouveau_bo *bo, unsigned reloc,
- unsigned offset, unsigned size)
-{
- struct nouveau_grobj *eng2d = nv50->screen->eng2d;
-
- reloc |= NOUVEAU_BO_WR;
-
- so_method(so, eng2d, NV50_2D_DST_FORMAT, 2);
- so_data (so, NV50_2D_DST_FORMAT_R8_UNORM);
- so_data (so, 1);
- so_method(so, eng2d, NV50_2D_DST_PITCH, 5);
- so_data (so, 262144);
- so_data (so, 65536);
- so_data (so, 1);
- so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0);
- so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
- so_data (so, 0);
- so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
- so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10);
- so_data (so, size);
- so_data (so, 1);
- so_data (so, 0);
- so_data (so, 1);
- so_data (so, 0);
- so_data (so, 1);
- so_data (so, 0);
- so_data (so, 0);
- so_data (so, 0);
- so_data (so, 0);
+ if (nv50->screen->cur_ctx != nv50)
+ nv50_switch_pipe_context(nv50);
+
+ if (nv50->dirty) {
+ for (i = 0; i < validate_list_len; ++i) {
+ struct state_validate *validate = &validate_list[i];
+
+ if (nv50->dirty & validate->states)
+ validate->func(nv50);
+ }
+ nv50->dirty = 0;
+ }
+
+ nv50_bufctx_emit_relocs(nv50);
+
+ return TRUE;
}
diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h
new file mode 100644
index 0000000000..515e3e78d4
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_stateobj.h
@@ -0,0 +1,55 @@
+
+#ifndef __NV50_STATEOBJ_H__
+#define __NV50_STATEOBJ_H__
+
+#include "pipe/p_state.h"
+
+#define NV50_SCISSORS_CLIPPING
+
+#define SB_BEGIN_3D(so, m, s) \
+ (so)->state[(so)->size++] = \
+ ((s) << 18) | (NV50_SUBCH_3D << 13) | NV50_3D_##m
+
+#define SB_BEGIN_3D_(so, m, s) \
+ (so)->state[(so)->size++] = \
+ ((s) << 18) | (NV50_SUBCH_3D << 13) | m
+
+#define SB_DATA(so, u) (so)->state[(so)->size++] = (u)
+
+#include "nv50_stateobj_tex.h"
+
+struct nv50_blend_stateobj {
+ struct pipe_blend_state pipe;
+ int size;
+ uint32_t state[78];
+};
+
+struct nv50_rasterizer_stateobj {
+ struct pipe_rasterizer_state pipe;
+ int size;
+ uint32_t state[40];
+};
+
+struct nv50_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state pipe;
+ int size;
+ uint32_t state[29];
+};
+
+struct nv50_vertex_element {
+ struct pipe_vertex_element pipe;
+ uint32_t state;
+};
+
+struct nv50_vertex_stateobj {
+ struct translate *translate;
+ unsigned num_elements;
+ uint32_t instance_elts;
+ uint32_t instance_bufs;
+ boolean need_conversion;
+ unsigned vertex_size;
+ unsigned packet_vertex_limit;
+ struct nv50_vertex_element element[0];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nv50/nv50_stateobj_tex.h
new file mode 100644
index 0000000000..99548cbdb4
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_stateobj_tex.h
@@ -0,0 +1,34 @@
+
+#ifndef __NV50_STATEOBJ_TEX_H__
+#define __NV50_STATEOBJ_TEX_H__
+
+#include "pipe/p_state.h"
+
+struct nv50_tsc_entry {
+ int id;
+ uint32_t tsc[8];
+};
+
+static INLINE struct nv50_tsc_entry *
+nv50_tsc_entry(void *hwcso)
+{
+ return (struct nv50_tsc_entry *)hwcso;
+}
+
+struct nv50_tic_entry {
+ struct pipe_sampler_view pipe;
+ int id;
+ uint32_t tic[8];
+};
+
+static INLINE struct nv50_tic_entry *
+nv50_tic_entry(struct pipe_sampler_view *view)
+{
+ return (struct nv50_tic_entry *)view;
+}
+
+extern void *
+nv50_sampler_state_create(struct pipe_context *,
+ const struct pipe_sampler_state *);
+
+#endif /* __NV50_STATEOBJ_TEX_H__ */
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index ce48022db4..dc9e2880f0 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -20,306 +20,362 @@
* SOFTWARE.
*/
-#define __NOUVEAU_PUSH_H__
#include <stdint.h>
-#include "nouveau/nouveau_pushbuf.h"
-#include "nv50_context.h"
-#include "nv50_resource.h"
+
#include "pipe/p_defines.h"
+
#include "util/u_inlines.h"
#include "util/u_pack_color.h"
-
#include "util/u_format.h"
+#include "nv50_context.h"
+#include "nv50_resource.h"
+
+#include "nv50_defs.xml.h"
+
/* return TRUE for formats that can be converted among each other by NV50_2D */
static INLINE boolean
nv50_2d_format_faithful(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_B8G8R8A8_SRGB:
- case PIPE_FORMAT_B8G8R8X8_SRGB:
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
- case PIPE_FORMAT_R8_UNORM:
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- case PIPE_FORMAT_R32G32B32_FLOAT:
- return TRUE;
- default:
- return FALSE;
- }
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ case PIPE_FORMAT_B8G8R8X8_SRGB:
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return TRUE;
+ default:
+ return FALSE;
+ }
}
static INLINE uint8_t
nv50_2d_format(enum pipe_format format)
{
- uint8_t id = nv50_format_table[format].rt;
-
- /* Hardware values for color formats range from 0xc0 to 0xff,
- * but the 2D engine doesn't support all of them.
- */
- if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0))))
- return id;
-
- switch (util_format_get_blocksize(format)) {
- case 1:
- return NV50_2D_DST_FORMAT_R8_UNORM;
- case 2:
- return NV50_2D_DST_FORMAT_R16_UNORM;
- case 4:
- return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM;
- default:
- return 0;
- }
+ uint8_t id = nv50_format_table[format].rt;
+
+ /* Hardware values for color formats range from 0xc0 to 0xff,
+ * but the 2D engine doesn't support all of them.
+ */
+ if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0))))
+ return id;
+
+ switch (util_format_get_blocksize(format)) {
+ case 1:
+ return NV50_SURFACE_FORMAT_R8_UNORM;
+ case 2:
+ return NV50_SURFACE_FORMAT_R16_UNORM;
+ case 4:
+ return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM;
+ default:
+ return 0;
+ }
}
static int
-nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
+nv50_2d_texture_set(struct nouveau_channel *chan, int dst,
+ struct nv50_miptree *mt, unsigned level, unsigned layer)
{
- struct nv50_miptree *mt = nv50_miptree(ps->texture);
- struct nouveau_channel *chan = screen->eng2d->channel;
- struct nouveau_grobj *eng2d = screen->eng2d;
- struct nouveau_bo *bo = nv50_miptree(ps->texture)->base.bo;
- int format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT;
- int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD);
-
- format = nv50_2d_format(ps->format);
- if (!format) {
- NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
- util_format_name(ps->format));
- return 1;
- }
-
- if (!nouveau_bo_tile_layout(bo)) {
- BEGIN_RING(chan, eng2d, mthd, 2);
- OUT_RING (chan, format);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, eng2d, mthd + 0x14, 5);
- OUT_RING (chan, mt->level[ps->u.tex.level].pitch);
- OUT_RING (chan, ps->width);
- OUT_RING (chan, ps->height);
- OUT_RELOCh(chan, bo, ((struct nv50_surface *)ps)->offset, flags);
- OUT_RELOCl(chan, bo, ((struct nv50_surface *)ps)->offset, flags);
- } else {
- BEGIN_RING(chan, eng2d, mthd, 5);
- OUT_RING (chan, format);
- OUT_RING (chan, 0);
- OUT_RING (chan, mt->level[ps->u.tex.level].tile_mode << 4);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, eng2d, mthd + 0x18, 4);
- OUT_RING (chan, ps->width);
- OUT_RING (chan, ps->height);
- OUT_RELOCh(chan, bo, ((struct nv50_surface *)ps)->offset, flags);
- OUT_RELOCl(chan, bo, ((struct nv50_surface *)ps)->offset, flags);
- }
-
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t width, height, depth;
+ uint32_t format;
+ uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT;
+ uint32_t flags = mt->base.domain | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD);
+ uint32_t offset = mt->level[level].offset;
+
+ format = nv50_2d_format(mt->base.base.format);
+ if (!format) {
+ NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+ util_format_name(mt->base.base.format));
+ return 1;
+ }
+
+ width = u_minify(mt->base.base.width0, level);
+ height = u_minify(mt->base.base.height0, level);
+
+ offset = mt->level[level].offset;
+ if (!mt->layout_3d) {
+ offset += mt->layer_stride * layer;
+ depth = 1;
+ layer = 0;
+ } else {
+ depth = u_minify(mt->base.base.depth0, level);
+ }
+
+ if (!(bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)) {
+ BEGIN_RING(chan, RING_2D_(mthd), 2);
+ OUT_RING (chan, format);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5);
+ OUT_RING (chan, mt->level[level].pitch);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ OUT_RELOCh(chan, bo, offset, flags);
+ OUT_RELOCl(chan, bo, offset, flags);
+ } else {
+ BEGIN_RING(chan, RING_2D_(mthd), 5);
+ OUT_RING (chan, format);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, mt->level[level].tile_mode << 4);
+ OUT_RING (chan, depth);
+ OUT_RING (chan, layer);
+ BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ OUT_RELOCh(chan, bo, offset, flags);
+ OUT_RELOCl(chan, bo, offset, flags);
+ }
+
#if 0
- if (dst) {
- BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4);
- OUT_RING (chan, 0);
- OUT_RING (chan, 0);
- OUT_RING (chan, surf->width);
- OUT_RING (chan, surf->height);
- }
+ if (dst) {
+ BEGIN_RING(chan, RING_2D_(NV50_2D_CLIP_X), 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ }
#endif
-
- return 0;
+ return 0;
}
-int
-nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst,
- int dx, int dy, struct pipe_surface *src, int sx, int sy,
- int w, int h)
+static int
+nv50_2d_texture_do_copy(struct nouveau_channel *chan,
+ struct nv50_miptree *dst, unsigned dst_level,
+ unsigned dx, unsigned dy, unsigned dz,
+ struct nv50_miptree *src, unsigned src_level,
+ unsigned sx, unsigned sy, unsigned sz,
+ unsigned w, unsigned h)
{
- struct nouveau_channel *chan = screen->eng2d->channel;
- struct nouveau_grobj *eng2d = screen->eng2d;
- int ret;
-
- ret = MARK_RING(chan, 2*16 + 32, 4);
- if (ret)
- return ret;
-
- ret = nv50_surface_set(screen, dst, 1);
- if (ret)
- return ret;
-
- ret = nv50_surface_set(screen, src, 0);
- if (ret)
- return ret;
-
- BEGIN_RING(chan, eng2d, 0x088c, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 4);
- OUT_RING (chan, dx);
- OUT_RING (chan, dy);
- OUT_RING (chan, w);
- OUT_RING (chan, h);
- BEGIN_RING(chan, eng2d, 0x08c0, 4);
- OUT_RING (chan, 0);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, eng2d, 0x08d0, 4);
- OUT_RING (chan, 0);
- OUT_RING (chan, sx);
- OUT_RING (chan, 0);
- OUT_RING (chan, sy);
-
- return 0;
+ int ret;
+
+ ret = MARK_RING(chan, 2 * 16 + 32, 4);
+ if (ret)
+ return ret;
+
+ ret = nv50_2d_texture_set(chan, 1, dst, dst_level, dz);
+ if (ret)
+ return ret;
+
+ ret = nv50_2d_texture_set(chan, 0, src, src_level, sz);
+ if (ret)
+ return ret;
+
+ /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */
+ BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4);
+ OUT_RING (chan, dx);
+ OUT_RING (chan, dy);
+ OUT_RING (chan, w);
+ OUT_RING (chan, h);
+ BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sx);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sy);
+
+ return 0;
}
static void
-nv50_surface_copy(struct pipe_context *pipe,
- struct pipe_resource *dest, unsigned dst_level,
- unsigned destx, unsigned desty, unsigned destz,
- struct pipe_resource *src, unsigned src_level,
- const struct pipe_box *src_box)
+nv50_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nv50_screen *screen = nv50->screen;
- struct pipe_surface *ps_dst, *ps_src, surf_tmpl;
-
-
- assert((src->format == dest->format) ||
- (nv50_2d_format_faithful(src->format) &&
- nv50_2d_format_faithful(dest->format)));
- assert(src_box->depth == 1);
-
- memset(&surf_tmpl, 0, sizeof(surf_tmpl));
- surf_tmpl.format = src->format;
- surf_tmpl.usage = 0; /* no bind flag - not a surface */
- surf_tmpl.u.tex.level = src_level;
- surf_tmpl.u.tex.first_layer = src_box->z;
- surf_tmpl.u.tex.last_layer = src_box->z;
- /* XXX really need surfaces here? */
- ps_src = nv50_miptree_surface_new(pipe, src, &surf_tmpl);
- surf_tmpl.format = dest->format;
- surf_tmpl.usage = 0; /* no bind flag - not a surface */
- surf_tmpl.u.tex.level = dst_level;
- surf_tmpl.u.tex.first_layer = destz;
- surf_tmpl.u.tex.last_layer = destz;
- ps_dst = nv50_miptree_surface_new(pipe, dest, &surf_tmpl);
-
- nv50_surface_do_copy(screen, ps_dst, destx, desty, ps_src, src_box->x,
- src_box->y, src_box->width, src_box->height);
-
- nv50_miptree_surface_del(pipe, ps_src);
- nv50_miptree_surface_del(pipe, ps_dst);
+ struct nv50_screen *screen = nv50_context(pipe)->screen;
+ int ret;
+ unsigned dst_layer = dstz, src_layer = src_box->z;
+
+ assert((src->format == dst->format) ||
+ (nv50_2d_format_faithful(src->format) &&
+ nv50_2d_format_faithful(dst->format)));
+
+ for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) {
+ ret = nv50_2d_texture_do_copy(screen->base.channel,
+ nv50_miptree(dst), dst_level,
+ dstx, dsty, dst_layer,
+ nv50_miptree(src), src_level,
+ src_box->x, src_box->y, src_layer,
+ src_box->width, src_box->height);
+ if (ret)
+ return;
+ }
}
static void
nv50_clear_render_target(struct pipe_context *pipe,
- struct pipe_surface *dst,
- const float *rgba,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height)
+ struct pipe_surface *dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nv50_screen *screen = nv50->screen;
- struct nouveau_channel *chan = screen->base.channel;
- struct nouveau_grobj *tesla = screen->tesla;
- struct nv50_miptree *mt = nv50_miptree(dst->texture);
- struct nouveau_bo *bo = mt->base.bo;
-
- BEGIN_RING(chan, tesla, NV50TCL_CLEAR_COLOR(0), 4);
- OUT_RINGf (chan, rgba[0]);
- OUT_RINGf (chan, rgba[1]);
- OUT_RINGf (chan, rgba[2]);
- OUT_RINGf (chan, rgba[3]);
-
- if (MARK_RING(chan, 18, 2))
- return;
-
- BEGIN_RING(chan, tesla, NV50TCL_RT_CONTROL, 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, tesla, NV50TCL_RT_ADDRESS_HIGH(0), 5);
- OUT_RELOCh(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RING (chan, nv50_format_table[dst->format].rt);
- OUT_RING (chan, mt->level[dst->u.tex.level].tile_mode << 4);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2);
- OUT_RING (chan, dst->width);
- OUT_RING (chan, dst->height);
- BEGIN_RING(chan, tesla, NV50TCL_RT_ARRAY_MODE, 1);
- OUT_RING (chan, 1);
-
- /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
-
- BEGIN_RING(chan, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2);
- OUT_RING (chan, (width << 16) | dstx);
- OUT_RING (chan, (height << 16) | dsty);
-
- BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1);
- OUT_RING (chan, 0x3c);
-
- nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+ struct nv50_surface *sf = nv50_surface(dst);
+ struct nouveau_bo *bo = mt->base.bo;
+
+ BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
+ OUT_RINGf (chan, rgba[0]);
+ OUT_RINGf (chan, rgba[1]);
+ OUT_RINGf (chan, rgba[2]);
+ OUT_RINGf (chan, rgba[3]);
+
+ if (MARK_RING(chan, 18, 2))
+ return;
+
+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 5);
+ OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (chan, nv50_format_table[dst->format].rt);
+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode << 4);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(RT_HORIZ(0)), 2);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ BEGIN_RING(chan, RING_3D(RT_ARRAY_MODE), 1);
+ OUT_RING (chan, 1);
+
+ /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+ OUT_RING (chan, (width << 16) | dstx);
+ OUT_RING (chan, (height << 16) | dsty);
+
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, 0x3c);
+
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
}
static void
nv50_clear_depth_stencil(struct pipe_context *pipe,
- struct pipe_surface *dst,
- unsigned clear_flags,
- double depth,
- unsigned stencil,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height)
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+ struct nv50_surface *sf = nv50_surface(dst);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t mode = 0;
+
+ if (clear_flags & PIPE_CLEAR_DEPTH) {
+ BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
+ OUT_RINGf (chan, depth);
+ mode |= NV50_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (clear_flags & PIPE_CLEAR_STENCIL) {
+ BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
+ OUT_RING (chan, stencil & 0xff);
+ mode |= NV50_3D_CLEAR_BUFFERS_S;
+ }
+
+ if (MARK_RING(chan, 17, 2))
+ return;
+
+ BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
+ OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (chan, nv50_format_table[dst->format].rt);
+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode << 4);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ OUT_RING (chan, (1 << 16) | 1);
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+ OUT_RING (chan, (width << 16) | dstx);
+ OUT_RING (chan, (height << 16) | dsty);
+
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, mode);
+
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+}
+
+void
+nv50_clear(struct pipe_context *pipe, unsigned buffers,
+ const float *rgba, double depth, unsigned stencil)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nv50_screen *screen = nv50->screen;
- struct nouveau_channel *chan = screen->base.channel;
- struct nouveau_grobj *tesla = screen->tesla;
- struct nv50_miptree *mt = nv50_miptree(dst->texture);
- struct nouveau_bo *bo = mt->base.bo;
- uint32_t mode = 0;
-
- if (clear_flags & PIPE_CLEAR_DEPTH) {
- BEGIN_RING(chan, tesla, NV50TCL_CLEAR_DEPTH, 1);
- OUT_RINGf (chan, depth);
- mode |= NV50TCL_CLEAR_BUFFERS_Z;
- }
-
- if (clear_flags & PIPE_CLEAR_STENCIL) {
- BEGIN_RING(chan, tesla, NV50TCL_CLEAR_STENCIL, 1);
- OUT_RING (chan, stencil & 0xff);
- mode |= NV50TCL_CLEAR_BUFFERS_S;
- }
-
- if (MARK_RING(chan, 17, 2))
- return;
-
- BEGIN_RING(chan, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
- OUT_RELOCh(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RING (chan, nv50_format_table[dst->format].rt);
- OUT_RING (chan, mt->level[dst->u.tex.level].tile_mode << 4);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, tesla, NV50TCL_ZETA_ENABLE, 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, tesla, NV50TCL_ZETA_HORIZ, 3);
- OUT_RING (chan, dst->width);
- OUT_RING (chan, dst->height);
- OUT_RING (chan, (1 << 16) | 1);
-
- BEGIN_RING(chan, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2);
- OUT_RING (chan, (width << 16) | dstx);
- OUT_RING (chan, (height << 16) | dsty);
-
- BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1);
- OUT_RING (chan, mode);
-
- nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ unsigned i;
+ const unsigned dirty = nv50->dirty;
+ uint32_t mode = 0;
+
+ /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
+ nv50->dirty &= NV50_NEW_FRAMEBUFFER;
+ if (!nv50_state_validate(nv50))
+ return;
+
+ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+ BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
+ OUT_RINGf (chan, rgba[0]);
+ OUT_RINGf (chan, rgba[1]);
+ OUT_RINGf (chan, rgba[2]);
+ OUT_RINGf (chan, rgba[3]);
+ mode =
+ NV50_3D_CLEAR_BUFFERS_R | NV50_3D_CLEAR_BUFFERS_G |
+ NV50_3D_CLEAR_BUFFERS_B | NV50_3D_CLEAR_BUFFERS_A;
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
+ OUT_RING (chan, fui(depth));
+ mode |= NV50_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
+ OUT_RING (chan, stencil & 0xff);
+ mode |= NV50_3D_CLEAR_BUFFERS_S;
+ }
+
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, mode);
+
+ for (i = 1; i < fb->nr_cbufs; i++) {
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, (i << 6) | 0x3c);
+ }
+
+ nv50->dirty = dirty & ~NV50_NEW_FRAMEBUFFER;
}
void
nv50_init_surface_functions(struct nv50_context *nv50)
{
- nv50->pipe.resource_copy_region = nv50_surface_copy;
- nv50->pipe.clear_render_target = nv50_clear_render_target;
- nv50->pipe.clear_depth_stencil = nv50_clear_depth_stencil;
+ struct pipe_context *pipe = &nv50->base.pipe;
+
+ pipe->resource_copy_region = nv50_resource_copy_region;
+ pipe->clear_render_target = nv50_clear_render_target;
+ pipe->clear_depth_stencil = nv50_clear_depth_stencil;
}
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 9243f9edce..9192d2e259 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -21,217 +21,298 @@
*/
#include "nv50_context.h"
-#include "nv50_texture.h"
#include "nv50_resource.h"
-
-#include "nouveau/nouveau_stateobj.h"
-#include "nouveau/nouveau_reloc.h"
+#include "nv50_texture.xml.h"
+#include "nv50_defs.xml.h"
#include "util/u_format.h"
+#define NV50_TIC_0_SWIZZLE__MASK \
+ (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
+ NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
+
static INLINE uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz)
+nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
{
- switch (swz) {
- case PIPE_SWIZZLE_RED:
- return (tc & NV50TIC_0_0_MAPR_MASK) >> NV50TIC_0_0_MAPR_SHIFT;
- case PIPE_SWIZZLE_GREEN:
- return (tc & NV50TIC_0_0_MAPG_MASK) >> NV50TIC_0_0_MAPG_SHIFT;
- case PIPE_SWIZZLE_BLUE:
- return (tc & NV50TIC_0_0_MAPB_MASK) >> NV50TIC_0_0_MAPB_SHIFT;
- case PIPE_SWIZZLE_ALPHA:
- return (tc & NV50TIC_0_0_MAPA_MASK) >> NV50TIC_0_0_MAPA_SHIFT;
- case PIPE_SWIZZLE_ONE:
- return 7;
- case PIPE_SWIZZLE_ZERO:
- default:
- return 0;
- }
+ switch (swz) {
+ case PIPE_SWIZZLE_RED:
+ return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
+ case PIPE_SWIZZLE_GREEN:
+ return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
+ case PIPE_SWIZZLE_BLUE:
+ return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
+ case PIPE_SWIZZLE_ALPHA:
+ return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_ONE:
+ return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT;
+ case PIPE_SWIZZLE_ZERO:
+ default:
+ return NV50_TIC_MAP_ZERO;
+ }
}
-boolean
-nv50_tex_construct(struct nv50_sampler_view *view)
+struct pipe_sampler_view *
+nv50_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ)
{
- const struct util_format_description *desc;
- struct nv50_miptree *mt = nv50_miptree(view->pipe.texture);
- uint32_t swz[4], *tic = view->tic;
-
- tic[0] = nv50_format_table[view->pipe.format].tic;
-
- swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r);
- swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g);
- swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b);
- swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a);
- view->tic[0] = (tic[0] & ~NV50TIC_0_0_SWIZZLE_MASK) |
- (swz[0] << NV50TIC_0_0_MAPR_SHIFT) |
- (swz[1] << NV50TIC_0_0_MAPG_SHIFT) |
- (swz[2] << NV50TIC_0_0_MAPB_SHIFT) |
- (swz[3] << NV50TIC_0_0_MAPA_SHIFT);
-
- tic[2] = 0x50001000;
- tic[2] |= ((mt->base.bo->tile_mode & 0x0f) << 22) |
- ((mt->base.bo->tile_mode & 0xf0) << 21);
-
- desc = util_format_description(mt->base.base.format);
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- tic[2] |= NV50TIC_0_2_COLORSPACE_SRGB;
-
- switch (mt->base.base.target) {
- case PIPE_TEXTURE_1D:
- tic[2] |= NV50TIC_0_2_TARGET_1D;
- break;
- case PIPE_TEXTURE_2D:
- tic[2] |= NV50TIC_0_2_TARGET_2D;
- break;
- case PIPE_TEXTURE_RECT:
- tic[2] |= NV50TIC_0_2_TARGET_RECT;
- break;
- case PIPE_TEXTURE_3D:
- tic[2] |= NV50TIC_0_2_TARGET_3D;
- break;
- case PIPE_TEXTURE_CUBE:
- tic[2] |= NV50TIC_0_2_TARGET_CUBE;
- break;
- default:
- NOUVEAU_ERR("invalid texture target: %d\n",
- mt->base.base.target);
- return FALSE;
- }
-
- tic[3] = 0x00300000;
-
- tic[4] = (1 << 31) | mt->base.base.width0;
- tic[5] = (mt->base.base.last_level << 28) |
- (mt->base.base.depth0 << 16) | mt->base.base.height0;
-
- tic[6] = 0x03000000;
-
- tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
-
- return TRUE;
+ const struct util_format_description *desc;
+ uint32_t *tic;
+ uint32_t swz[4];
+ uint32_t depth;
+ struct nv50_tic_entry *view;
+ struct nv50_miptree *mt = nv50_miptree(texture);
+ boolean tex_int;
+
+ view = MALLOC_STRUCT(nv50_tic_entry);
+ if (!view)
+ return NULL;
+
+ view->pipe = *templ;
+ view->pipe.reference.count = 1;
+ view->pipe.texture = NULL;
+ view->pipe.context = pipe;
+
+ view->id = -1;
+
+ pipe_resource_reference(&view->pipe.texture, texture);
+
+ tic = &view->tic[0];
+
+ desc = util_format_description(view->pipe.format);
+
+ /* TIC[0] */
+
+ tic[0] = nv50_format_table[view->pipe.format].tic;
+
+ tex_int = FALSE; /* XXX: integer textures */
+
+ swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int);
+ tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
+ (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
+ (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
+ (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
+ (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+
+ tic[1] = /* mt->base.bo->offset; */ 0;
+ tic[2] = /* mt->base.bo->offset >> 32 */ 0;
+
+ tic[2] |= 0x10001000 | NV50_TIC_2_NO_BORDER;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+
+ if (mt->base.base.target != PIPE_TEXTURE_RECT)
+ tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+
+ tic[2] |=
+ ((mt->base.bo->tile_mode & 0x0f) << (22 - 0)) |
+ ((mt->base.bo->tile_mode & 0xf0) << (25 - 4));
+
+ depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
+
+ if (mt->base.base.target == PIPE_TEXTURE_1D_ARRAY ||
+ mt->base.base.target == PIPE_TEXTURE_2D_ARRAY) {
+ tic[1] = view->pipe.u.tex.first_layer * mt->layer_stride;
+ depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
+ }
+
+ switch (mt->base.base.target) {
+ case PIPE_TEXTURE_1D:
+ tic[2] |= NV50_TIC_2_TARGET_1D;
+ break;
+ case PIPE_TEXTURE_2D:
+ tic[2] |= NV50_TIC_2_TARGET_2D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ tic[2] |= NV50_TIC_2_TARGET_RECT;
+ break;
+ case PIPE_TEXTURE_3D:
+ tic[2] |= NV50_TIC_2_TARGET_3D;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ depth /= 6;
+ if (depth > 1)
+ tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ else
+ tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ break;
+ case PIPE_BUFFER:
+ tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR;
+ break;
+ default:
+ NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
+ return FALSE;
+ }
+
+ if (mt->base.base.target == PIPE_BUFFER)
+ tic[3] = mt->base.base.width0;
+ else
+ tic[3] = 0x00300000;
+
+ tic[4] = (1 << 31) | mt->base.base.width0;
+
+ tic[5] = mt->base.base.height0 & 0xffff;
+ tic[5] |= depth << 16;
+ tic[5] |= mt->base.base.last_level << 28;
+
+ tic[6] = 0x03000000;
+
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+
+ return &view->pipe;
}
-static int
-nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
- unsigned p)
+static boolean
+nv50_validate_tic(struct nv50_context *nv50, int s)
{
- struct nouveau_grobj *eng2d = nv50->screen->eng2d;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- unsigned unit, j;
-
- const unsigned rll = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW;
- const unsigned rlh = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH
- | NOUVEAU_BO_OR;
-
- nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM,
- p * (32 * 8 * 4), nv50->sampler_view_nr[p] * 8 * 4);
-
- for (unit = 0; unit < nv50->sampler_view_nr[p]; ++unit) {
- struct nv50_sampler_view *view =
- nv50_sampler_view(nv50->sampler_views[p][unit]);
-
- so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
- if (view) {
- uint32_t tic2 = view->tic[2];
- struct nv50_miptree *mt =
- nv50_miptree(view->pipe.texture);
-
- tic2 &= ~NV50TIC_0_2_NORMALIZED_COORDS;
- if (nv50->sampler[p][unit]->normalized)
- tic2 |= NV50TIC_0_2_NORMALIZED_COORDS;
- view->tic[2] = tic2;
-
- so_data (so, view->tic[0]);
- so_reloc (so, mt->base.bo, 0, rll, 0, 0);
- so_reloc (so, mt->base.bo, 0, rlh, tic2, tic2);
- so_datap (so, &view->tic[3], 5);
-
- /* Set TEX insn $t src binding $unit in program type p
- * to TIC, TSC entry (32 * p + unit), mark valid (1).
- */
- so_method(so, tesla, NV50TCL_BIND_TIC(p), 1);
- so_data (so, ((32 * p + unit) << 9) | (unit << 1) | 1);
- } else {
- for (j = 0; j < 8; ++j)
- so_data(so, 0);
- so_method(so, tesla, NV50TCL_BIND_TIC(p), 1);
- so_data (so, (unit << 1) | 0);
- }
- }
-
- for (; unit < nv50->state.sampler_view_nr[p]; unit++) {
- /* Make other bindings invalid. */
- so_method(so, tesla, NV50TCL_BIND_TIC(p), 1);
- so_data (so, (unit << 1) | 0);
- }
-
- nv50->state.sampler_view_nr[p] = nv50->sampler_view_nr[p];
- return TRUE;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_bo *txc = nv50->screen->txc;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nv50->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);
+ struct nv04_resource *res;
+
+ if (!tic) {
+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+ OUT_RING (chan, (i << 1) | 0);
+ continue;
+ }
+ res = &nv50_miptree(tic->pipe.texture)->base;
+
+ if (tic->id < 0) {
+ uint32_t offset = tic->tic[1];
+
+ tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
+
+ MARK_RING (chan, 24 + 8, 4);
+ BEGIN_RING(chan, RING_2D(DST_FORMAT), 2);
+ OUT_RING (chan, NV50_SURFACE_FORMAT_R8_UNORM);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_2D(DST_PITCH), 5);
+ OUT_RING (chan, 262144);
+ OUT_RING (chan, 65536);
+ OUT_RING (chan, 1);
+ OUT_RELOCh(chan, txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, RING_2D(SIFC_BITMAP_ENABLE), 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, NV50_SURFACE_FORMAT_R8_UNORM);
+ BEGIN_RING(chan, RING_2D(SIFC_WIDTH), 10);
+ OUT_RING (chan, 32);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, tic->id * 32);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING_NI(chan, RING_2D(SIFC_DATA), 8);
+ OUT_RING (chan, tic->tic[0]);
+ OUT_RELOCl(chan, res->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOC (chan, res->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, tic->tic[2], tic->tic[2]);
+ OUT_RINGp (chan, &tic->tic[3], 5);
+
+ need_flush = TRUE;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
+ OUT_RING (chan, 0x20); //(tic->id << 4) | 1);
+ }
+
+ nv50->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nv50_bufctx_add_resident(nv50, NV50_BUFCTX_TEXTURES, res,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+
+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+ OUT_RING (chan, (tic->id << 9) | (i << 1) | 1);
+ }
+ for (; i < nv50->state.num_textures[s]; ++i) {
+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+ OUT_RING (chan, (i << 1) | 0);
+ }
+ nv50->state.num_textures[s] = nv50->num_textures[s];
+
+ return need_flush;
}
-static void
-nv50_emit_texture_relocs(struct nv50_context *nv50, int prog)
+void nv50_validate_textures(struct nv50_context *nv50)
{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_bo *tic = nv50->screen->tic;
- int unit;
-
- for (unit = 0; unit < nv50->sampler_view_nr[prog]; unit++) {
- struct nv50_sampler_view *view;
- struct nv50_miptree *mt;
- const unsigned base = ((prog * 32) + unit) * 32;
-
- view = nv50_sampler_view(nv50->sampler_views[prog][unit]);
- if (!view)
- continue;
- mt = nv50_miptree(view->pipe.texture);
-
- nouveau_reloc_emit(chan, tic, base + 4, NULL, mt->base.bo, 0, 0,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
- nouveau_reloc_emit(chan, tic, base + 8, NULL, mt->base.bo, 0, 0,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, view->tic[2], view->tic[2]);
- }
+ boolean need_flush;
+
+ need_flush = nv50_validate_tic(nv50, 0);
+ need_flush |= nv50_validate_tic(nv50, 2);
+
+ if (need_flush) {
+ BEGIN_RING(nv50->screen->base.channel, RING_3D(TIC_FLUSH), 1);
+ OUT_RING (nv50->screen->base.channel, 0);
+ }
}
-void
-nv50_tex_relocs(struct nv50_context *nv50)
+static boolean
+nv50_validate_tsc(struct nv50_context *nv50, int s)
{
- nv50_emit_texture_relocs(nv50, 2); /* FP */
- nv50_emit_texture_relocs(nv50, 0); /* VP */
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nv50->num_samplers[s]; ++i) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(nv50->samplers[s][i]);
+
+ if (!tsc) {
+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+ OUT_RING (chan, (i << 4) | 0);
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc);
+
+ nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,
+ 65536 + tsc->id * 32,
+ NOUVEAU_BO_VRAM, 32, tsc->tsc);
+ need_flush = TRUE;
+ }
+ nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+ OUT_RING (chan, (tsc->id << 12) | (i << 4) | 1);
+ }
+ for (; i < nv50->state.num_samplers[s]; ++i) {
+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+ OUT_RING (chan, (i << 4) | 0);
+ }
+ nv50->state.num_samplers[s] = nv50->num_samplers[s];
+
+ return need_flush;
}
-struct nouveau_stateobj *
-nv50_tex_validate(struct nv50_context *nv50)
+void nv50_validate_samplers(struct nv50_context *nv50)
{
- struct nouveau_stateobj *so;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- unsigned p, m = 0, d = 0, r = 0;
-
- for (p = 0; p < 3; ++p) {
- unsigned nr = MAX2(nv50->sampler_view_nr[p],
- nv50->state.sampler_view_nr[p]);
- m += nr;
- d += nr;
- r += nv50->sampler_view_nr[p];
- }
- m = m * 2 + 3 * 4 + 1;
- d = d * 9 + 3 * 19 + 1;
- r = r * 2 + 3 * 2;
-
- so = so_new(m, d, r);
-
- if (nv50_validate_textures(nv50, so, 0) == FALSE ||
- nv50_validate_textures(nv50, so, 2) == FALSE) {
- so_ref(NULL, &so);
-
- NOUVEAU_ERR("failed tex validate\n");
- return NULL;
- }
-
- so_method(so, tesla, 0x1330, 1); /* flush TIC */
- so_data (so, 0);
-
- return so;
+ boolean need_flush;
+
+ need_flush = nv50_validate_tsc(nv50, 0);
+ need_flush |= nv50_validate_tsc(nv50, 2);
+
+ if (need_flush) {
+ BEGIN_RING(nv50->screen->base.channel, RING_3D(TSC_FLUSH), 1);
+ OUT_RING (nv50->screen->base.channel, 0);
+ }
}
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
deleted file mode 100644
index b4939943e8..0000000000
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ /dev/null
@@ -1,197 +0,0 @@
-#ifndef __NV50_TEXTURE_H__
-#define __NV50_TEXTURE_H__
-
-/* It'd be really nice to have these in nouveau_class.h generated by
- * renouveau like the rest of the object header - but not sure it can
- * handle non-object stuff nicely - need to look into it.
- */
-
-/* Texture image control block */
-#define NV50TIC_0_0_SWIZZLE_MASK 0x3ffc0000
-#define NV50TIC_0_0_MAPA_MASK 0x38000000
-#define NV50TIC_0_0_MAPA_SHIFT 27
-#define NV50TIC_0_0_MAPA_ZERO 0x00000000
-#define NV50TIC_0_0_MAPA_C0 0x10000000
-#define NV50TIC_0_0_MAPA_C1 0x18000000
-#define NV50TIC_0_0_MAPA_C2 0x20000000
-#define NV50TIC_0_0_MAPA_C3 0x28000000
-#define NV50TIC_0_0_MAPA_ONE 0x38000000
-#define NV50TIC_0_0_MAPB_MASK 0x07000000
-#define NV50TIC_0_0_MAPB_SHIFT 24
-#define NV50TIC_0_0_MAPB_ZERO 0x00000000
-#define NV50TIC_0_0_MAPB_C0 0x02000000
-#define NV50TIC_0_0_MAPB_C1 0x03000000
-#define NV50TIC_0_0_MAPB_C2 0x04000000
-#define NV50TIC_0_0_MAPB_C3 0x05000000
-#define NV50TIC_0_0_MAPB_ONE 0x07000000
-#define NV50TIC_0_0_MAPG_MASK 0x00e00000
-#define NV50TIC_0_0_MAPG_SHIFT 21
-#define NV50TIC_0_0_MAPG_ZERO 0x00000000
-#define NV50TIC_0_0_MAPG_C0 0x00400000
-#define NV50TIC_0_0_MAPG_C1 0x00600000
-#define NV50TIC_0_0_MAPG_C2 0x00800000
-#define NV50TIC_0_0_MAPG_C3 0x00a00000
-#define NV50TIC_0_0_MAPG_ONE 0x00e00000
-#define NV50TIC_0_0_MAPR_MASK 0x001c0000
-#define NV50TIC_0_0_MAPR_SHIFT 18
-#define NV50TIC_0_0_MAPR_ZERO 0x00000000
-#define NV50TIC_0_0_MAPR_C0 0x00080000
-#define NV50TIC_0_0_MAPR_C1 0x000c0000
-#define NV50TIC_0_0_MAPR_C2 0x00100000
-#define NV50TIC_0_0_MAPR_C3 0x00140000
-#define NV50TIC_0_0_MAPR_ONE 0x001c0000
-#define NV50TIC_0_0_TYPEA_MASK 0x00038000
-#define NV50TIC_0_0_TYPEA_UNORM 0x00010000
-#define NV50TIC_0_0_TYPEA_SNORM 0x00008000
-#define NV50TIC_0_0_TYPEA_SINT 0x00018000
-#define NV50TIC_0_0_TYPEA_UINT 0x00020000
-#define NV50TIC_0_0_TYPEA_SSCALED 0x00028000
-#define NV50TIC_0_0_TYPEA_USCALED 0x00030000
-#define NV50TIC_0_0_TYPEA_FLOAT 0x00038000
-#define NV50TIC_0_0_TYPEB_MASK 0x00007000
-#define NV50TIC_0_0_TYPEB_UNORM 0x00002000
-#define NV50TIC_0_0_TYPEB_SNORM 0x00001000
-#define NV50TIC_0_0_TYPEB_SINT 0x00003000
-#define NV50TIC_0_0_TYPEB_UINT 0x00004000
-#define NV50TIC_0_0_TYPEB_SSCALED 0x00005000
-#define NV50TIC_0_0_TYPEB_USCALED 0x00006000
-#define NV50TIC_0_0_TYPEB_FLOAT 0x00007000
-#define NV50TIC_0_0_TYPEG_MASK 0x00000e00
-#define NV50TIC_0_0_TYPEG_UNORM 0x00000400
-#define NV50TIC_0_0_TYPEG_SNORM 0x00000200
-#define NV50TIC_0_0_TYPEG_SINT 0x00000600
-#define NV50TIC_0_0_TYPEG_UINT 0x00000800
-#define NV50TIC_0_0_TYPEG_SSCALED 0x00000a00
-#define NV50TIC_0_0_TYPEG_USCALED 0x00000c00
-#define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00
-#define NV50TIC_0_0_TYPER_MASK 0x000001c0
-#define NV50TIC_0_0_TYPER_UNORM 0x00000080
-#define NV50TIC_0_0_TYPER_SNORM 0x00000040
-#define NV50TIC_0_0_TYPER_SINT 0x000000c0
-#define NV50TIC_0_0_TYPER_UINT 0x00000100
-#define NV50TIC_0_0_TYPER_SSCALED 0x00000140
-#define NV50TIC_0_0_TYPER_USCALED 0x00000180
-#define NV50TIC_0_0_TYPER_FLOAT 0x000001c0
-#define NV50TIC_0_0_FMT_MASK 0x0000003f
-#define NV50TIC_0_0_FMT_32_32_32_32 0x00000001
-#define NV50TIC_0_0_FMT_16_16_16_16 0x00000003
-#define NV50TIC_0_0_FMT_32_32 0x00000004
-#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008
-#define NV50TIC_0_0_FMT_2_10_10_10 0x00000009
-#define NV50TIC_0_0_FMT_16_16 0x0000000c
-#define NV50TIC_0_0_FMT_32 0x0000000f
-#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012
-/* #define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 */
-#define NV50TIC_0_0_FMT_1_5_5_5 0x00000014
-#define NV50TIC_0_0_FMT_5_6_5 0x00000015
-#define NV50TIC_0_0_FMT_8_8 0x00000018
-#define NV50TIC_0_0_FMT_16 0x0000001b
-#define NV50TIC_0_0_FMT_8 0x0000001d
-#define NV50TIC_0_0_FMT_5_9_9_9 0x00000020
-#define NV50TIC_0_0_FMT_10_11_11 0x00000021
-#define NV50TIC_0_0_FMT_DXT1 0x00000024
-#define NV50TIC_0_0_FMT_DXT3 0x00000025
-#define NV50TIC_0_0_FMT_DXT5 0x00000026
-#define NV50TIC_0_0_FMT_RGTC1 0x00000027
-#define NV50TIC_0_0_FMT_RGTC2 0x00000028
-#define NV50TIC_0_0_FMT_24_8 0x00000029
-#define NV50TIC_0_0_FMT_8_24 0x0000002a
-#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f
-#define NV50TIC_0_0_FMT_32_8 0x00000030
-#define NV50TIC_0_0_FMT_16_DEPTH 0x0000003a
-
-#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
-#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0
-
-#define NV50TIC_0_2_COLORSPACE_SRGB 0x00000400
-#define NV50TIC_0_2_TARGET_1D 0x00000000
-#define NV50TIC_0_2_TARGET_2D 0x00004000
-#define NV50TIC_0_2_TARGET_3D 0x00008000
-#define NV50TIC_0_2_TARGET_CUBE 0x0000c000
-#define NV50TIC_0_2_TARGET_1D_ARRAY 0x00010000
-#define NV50TIC_0_2_TARGET_2D_ARRAY 0x00014000
-#define NV50TIC_0_2_TARGET_BUFFER 0x00018000
-#define NV50TIC_0_2_TARGET_RECT 0x0001c000
-/* #define NV50TIC_0_0_TILE_MODE_LINEAR 0x00040000 */
-#define NV50TIC_0_2_TILE_MODE_Y_MASK 0x01c00000
-#define NV50TIC_0_2_TILE_MODE_Y_SHIFT 22
-#define NV50TIC_0_2_TILE_MODE_Z_MASK 0x0e000000
-#define NV50TIC_0_2_TILE_MODE_Z_SHIFT 25
-#define NV50TIC_0_2_NORMALIZED_COORDS 0x80000000
-
-#define NV50TIC_0_3_UNKNOWN_MASK 0xffffffff
-
-#define NV50TIC_0_4_WIDTH_MASK 0x0000ffff
-#define NV50TIC_0_4_WIDTH_SHIFT 0
-
-#define NV50TIC_0_5_LAST_LEVEL_MASK 0xf0000000
-#define NV50TIC_0_5_LAST_LEVEL_SHIFT 28
-#define NV50TIC_0_5_DEPTH_MASK 0x0fff0000
-#define NV50TIC_0_5_DEPTH_SHIFT 16
-#define NV50TIC_0_5_HEIGHT_MASK 0x0000ffff
-#define NV50TIC_0_5_HEIGHT_SHIFT 0
-#define NV50TIC_0_6_UNKNOWN_MASK 0xffffffff
-
-#define NV50TIC_0_7_BASE_LEVEL_MASK 0x0000000f
-#define NV50TIC_0_7_BASE_LEVEL_SHIFT 0
-#define NV50TIC_0_7_MAX_LEVEL_MASK 0x000000f0
-#define NV50TIC_0_7_MAX_LEVEL_SHIFT 4
-
-/* Texture sampler control block */
-#define NV50TSC_1_0_WRAPS_MASK 0x00000007
-#define NV50TSC_1_0_WRAPS_REPEAT 0x00000000
-#define NV50TSC_1_0_WRAPS_MIRROR_REPEAT 0x00000001
-#define NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE 0x00000002
-#define NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER 0x00000003
-#define NV50TSC_1_0_WRAPS_CLAMP 0x00000004
-#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE 0x00000005
-#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER 0x00000006
-#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP 0x00000007
-#define NV50TSC_1_0_WRAPT_MASK 0x00000038
-#define NV50TSC_1_0_WRAPT_REPEAT 0x00000000
-#define NV50TSC_1_0_WRAPT_MIRROR_REPEAT 0x00000008
-#define NV50TSC_1_0_WRAPT_CLAMP_TO_EDGE 0x00000010
-#define NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER 0x00000018
-#define NV50TSC_1_0_WRAPT_CLAMP 0x00000020
-#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_EDGE 0x00000028
-#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_BORDER 0x00000030
-#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP 0x00000038
-#define NV50TSC_1_0_WRAPR_MASK 0x000001c0
-#define NV50TSC_1_0_WRAPR_REPEAT 0x00000000
-#define NV50TSC_1_0_WRAPR_MIRROR_REPEAT 0x00000040
-#define NV50TSC_1_0_WRAPR_CLAMP_TO_EDGE 0x00000080
-#define NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER 0x000000c0
-#define NV50TSC_1_0_WRAPR_CLAMP 0x00000100
-#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140
-#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180
-#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0
-#define NV50TSC_1_0_MAX_ANISOTROPY_MASK 0x00700000
-
-#define NV50TSC_1_1_MAGF_MASK 0x00000003
-#define NV50TSC_1_1_MAGF_NEAREST 0x00000001
-#define NV50TSC_1_1_MAGF_LINEAR 0x00000002
-#define NV50TSC_1_1_MINF_MASK 0x00000030
-#define NV50TSC_1_1_MINF_NEAREST 0x00000010
-#define NV50TSC_1_1_MINF_LINEAR 0x00000020
-#define NV50TSC_1_1_MIPF_MASK 0x000000c0
-#define NV50TSC_1_1_MIPF_NONE 0x00000040
-#define NV50TSC_1_1_MIPF_NEAREST 0x00000080
-#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0
-#define NV50TSC_1_1_LOD_BIAS_MASK 0x01fff000
-#define NV50TSC_1_1_UNKN_ANISO_15 0x10000000
-#define NV50TSC_1_1_UNKN_ANISO_35 0x18000000
-
-#define NV50TSC_1_2_MIN_LOD_MASK 0x00000f00
-#define NV50TSC_1_2_MAX_LOD_MASK 0x00f00000
-
-#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff
-
-#define NV50TSC_1_4_BORDER_COLOR_RED_MASK 0xffffffff
-
-#define NV50TSC_1_5_BORDER_COLOR_GREEN_MASK 0xffffffff
-
-#define NV50TSC_1_6_BORDER_COLOR_BLUE_MASK 0xffffffff
-
-#define NV50TSC_1_7_BORDER_COLOR_ALPHA_MASK 0xffffffff
-
-#endif
diff --git a/src/gallium/drivers/nv50/nv50_texture.xml.h b/src/gallium/drivers/nv50/nv50_texture.xml.h
new file mode 100644
index 0000000000..e0cbbdf0d7
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_texture.xml.h
@@ -0,0 +1,279 @@
+#ifndef NV50_TEXTURE_XML
+#define NV50_TEXTURE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_texture.xml ( 8377 bytes, from 2011-02-12 12:05:21)
+- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20)
+
+Copyright (C) 2006-2011 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_TIC_MAP_ZERO 0x00000000
+#define NV50_TIC_MAP_C0 0x00000002
+#define NV50_TIC_MAP_C1 0x00000003
+#define NV50_TIC_MAP_C2 0x00000004
+#define NV50_TIC_MAP_C3 0x00000005
+#define NV50_TIC_MAP_ONE_INT 0x00000006
+#define NV50_TIC_MAP_ONE_FLOAT 0x00000007
+#define NV50_TIC_TYPE_SNORM 0x00000001
+#define NV50_TIC_TYPE_UNORM 0x00000002
+#define NV50_TIC_TYPE_SINT 0x00000003
+#define NV50_TIC_TYPE_UINT 0x00000004
+#define NV50_TIC_TYPE_SSCALED 0x00000005
+#define NV50_TIC_TYPE_USCALED 0x00000006
+#define NV50_TIC_TYPE_FLOAT 0x00000007
+#define NV50_TSC_WRAP_REPEAT 0x00000000
+#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001
+#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002
+#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003
+#define NV50_TSC_WRAP_CLAMP 0x00000004
+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005
+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006
+#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007
+#define NV50_TIC__SIZE 0x00000020
+#define NV50_TIC_0 0x00000000
+#define NV50_TIC_0_MAPA__MASK 0x38000000
+#define NV50_TIC_0_MAPA__SHIFT 27
+#define NV50_TIC_0_MAPB__MASK 0x07000000
+#define NV50_TIC_0_MAPB__SHIFT 24
+#define NV50_TIC_0_MAPG__MASK 0x00e00000
+#define NV50_TIC_0_MAPG__SHIFT 21
+#define NV50_TIC_0_MAPR__MASK 0x001c0000
+#define NV50_TIC_0_MAPR__SHIFT 18
+#define NV50_TIC_0_TYPE3__MASK 0x00038000
+#define NV50_TIC_0_TYPE3__SHIFT 15
+#define NV50_TIC_0_TYPE2__MASK 0x00007000
+#define NV50_TIC_0_TYPE2__SHIFT 12
+#define NV50_TIC_0_TYPE1__MASK 0x00000e00
+#define NV50_TIC_0_TYPE1__SHIFT 9
+#define NV50_TIC_0_TYPE0__MASK 0x000001c0
+#define NV50_TIC_0_TYPE0__SHIFT 6
+#define NV50_TIC_0_FMT__MASK 0x0000003f
+#define NV50_TIC_0_FMT__SHIFT 0
+#define NV50_TIC_0_FMT_32_32_32_32 0x00000001
+#define NV50_TIC_0_FMT_16_16_16_16 0x00000003
+#define NV50_TIC_0_FMT_32_32 0x00000004
+#define NV50_TIC_0_FMT_32_8 0x00000005
+#define NV50_TIC_0_FMT_8_8_8_8 0x00000008
+#define NV50_TIC_0_FMT_2_10_10_10 0x00000009
+#define NV50_TIC_0_FMT_16_16 0x0000000c
+#define NV50_TIC_0_FMT_8_24 0x0000000d
+#define NV50_TIC_0_FMT_24_8 0x0000000e
+#define NV50_TIC_0_FMT_32 0x0000000f
+#define NV50_TIC_0_FMT_BPTC_FLOAT 0x00000010
+#define NV50_TIC_0_FMT_BPTC_UFLOAT 0x00000011
+#define NV50_TIC_0_FMT_4_4_4_4 0x00000012
+#define NV50_TIC_0_FMT_5_5_5_1 0x00000013
+#define NV50_TIC_0_FMT_1_5_5_5 0x00000014
+#define NV50_TIC_0_FMT_5_6_5 0x00000015
+#define NV50_TIC_0_FMT_6_5_5 0x00000016
+#define NV50_TIC_0_FMT_BPTC 0x00000017
+#define NV50_TIC_0_FMT_8_8 0x00000018
+#define NV50_TIC_0_FMT_16 0x0000001b
+#define NV50_TIC_0_FMT_8 0x0000001d
+#define NV50_TIC_0_FMT_4_4 0x0000001e
+#define NV50_TIC_0_FMT_BITMAP_8X8 0x0000001f
+#define NV50_TIC_0_FMT_E5_9_9_9 0x00000020
+#define NV50_TIC_0_FMT_10_11_11 0x00000021
+#define NV50_TIC_0_FMT_C1_C2_C1_C0 0x00000022
+#define NV50_TIC_0_FMT_C2_C1_C0_C1 0x00000023
+#define NV50_TIC_0_FMT_DXT1 0x00000024
+#define NV50_TIC_0_FMT_DXT3 0x00000025
+#define NV50_TIC_0_FMT_DXT5 0x00000026
+#define NV50_TIC_0_FMT_RGTC1 0x00000027
+#define NV50_TIC_0_FMT_RGTC2 0x00000028
+#define NV50_TIC_0_FMT_Z24S8 0x00000029
+#define NV50_TIC_0_FMT_S8Z24 0x0000002a
+#define NV50_TIC_0_FMT_X8Z24 0x0000002b
+#define NV50_TIC_0_FMT_C8Z24_MS4_CS4 0x0000002c
+#define NV50_TIC_0_FMT_C8Z24_MS8_CS8 0x0000002d
+#define NV50_TIC_0_FMT_C8Z24_MS4_CS12 0x0000002e
+#define NV50_TIC_0_FMT_Z32 0x0000002f
+#define NV50_TIC_0_FMT_X24S8Z32 0x00000030
+#define NV50_TIC_0_FMT_X16C8S8X8Z24_MS4_CS4 0x00000031
+#define NV50_TIC_0_FMT_X16C8S8X8Z24_MS8_CS8 0x00000032
+#define NV50_TIC_0_FMT_X16C8X8Z32_MS4_CS4 0x00000033
+#define NV50_TIC_0_FMT_X16C8X8Z32_MS8_CS8 0x00000034
+#define NV50_TIC_0_FMT_X16C8S8Z32_MS4_CS4 0x00000035
+#define NV50_TIC_0_FMT_X16C8S8Z32_MS8_CS8 0x00000036
+#define NV50_TIC_0_FMT_X16C8S8X8Z24_MS4_CS12 0x00000037
+#define NV50_TIC_0_FMT_X16C8X8Z32_MS4_CS12 0x00000038
+#define NV50_TIC_0_FMT_X16C8S8Z32_MS4_CS12 0x00000039
+#define NV50_TIC_0_FMT_Z16 0x0000003a
+
+#define NV50_TIC_1 0x00000004
+#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff
+#define NV50_TIC_1_OFFSET_LOW__SHIFT 0
+
+#define NV50_TIC_2 0x00000008
+#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff
+#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0
+#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400
+#define NV50_TIC_2_TARGET__MASK 0x0003c000
+#define NV50_TIC_2_TARGET__SHIFT 14
+#define NV50_TIC_2_TARGET_1D 0x00000000
+#define NV50_TIC_2_TARGET_2D 0x00004000
+#define NV50_TIC_2_TARGET_3D 0x00008000
+#define NV50_TIC_2_TARGET_CUBE 0x0000c000
+#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000
+#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000
+#define NV50_TIC_2_TARGET_BUFFER 0x00018000
+#define NV50_TIC_2_TARGET_RECT 0x0001c000
+#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000
+#define NV50_TIC_2_LINEAR 0x00040000
+#define NV50_TIC_2_TILE_MODE_X__MASK 0x00380000
+#define NV50_TIC_2_TILE_MODE_X__SHIFT 19
+#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000
+#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22
+#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000
+#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25
+#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000
+#define NV50_TIC_2_2D_UNK0258__SHIFT 28
+#define NV50_TIC_2_NO_BORDER 0x40000000
+#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000
+
+#define NV50_TIC_3 0x0000000c
+#define NV50_TIC_3_PITCH__MASK 0xffffffff
+#define NV50_TIC_3_PITCH__SHIFT 0
+
+#define NV50_TIC_4 0x00000010
+#define NV50_TIC_4_WIDTH__MASK 0xffffffff
+#define NV50_TIC_4_WIDTH__SHIFT 0
+
+#define NV50_TIC_5 0x00000014
+#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000
+#define NV50_TIC_5_LAST_LEVEL__SHIFT 28
+#define NV50_TIC_5_DEPTH__MASK 0x0fff0000
+#define NV50_TIC_5_DEPTH__SHIFT 16
+#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff
+#define NV50_TIC_5_HEIGHT__SHIFT 0
+
+#define NV50_TIC_7 0x0000001c
+#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f
+#define NV50_TIC_7_BASE_LEVEL__SHIFT 0
+#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0
+#define NV50_TIC_7_MAX_LEVEL__SHIFT 4
+
+#define NV50_TSC__SIZE 0x00000020
+#define NV50_TSC_0 0x00000000
+#define NV50_TSC_0_WRAPS__MASK 0x00000007
+#define NV50_TSC_0_WRAPS__SHIFT 0
+#define NV50_TSC_0_WRAPT__MASK 0x00000038
+#define NV50_TSC_0_WRAPT__SHIFT 3
+#define NV50_TSC_0_WRAPR__MASK 0x000001c0
+#define NV50_TSC_0_WRAPR__SHIFT 6
+#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200
+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00
+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10
+#define NV50_TSC_0_BOX_S__MASK 0x0001c000
+#define NV50_TSC_0_BOX_S__SHIFT 14
+#define NV50_TSC_0_BOX_T__MASK 0x000e0000
+#define NV50_TSC_0_BOX_T__SHIFT 17
+#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000
+#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20
+
+#define NV50_TSC_1 0x00000004
+#define NV50_TSC_1_UNKN_ANISO_15 0x10000000
+#define NV50_TSC_1_UNKN_ANISO_35 0x18000000
+#define NV50_TSC_1_MAGF__MASK 0x00000003
+#define NV50_TSC_1_MAGF__SHIFT 0
+#define NV50_TSC_1_MAGF_NEAREST 0x00000001
+#define NV50_TSC_1_MAGF_LINEAR 0x00000002
+#define NV50_TSC_1_MINF__MASK 0x00000030
+#define NV50_TSC_1_MINF__SHIFT 4
+#define NV50_TSC_1_MINF_NEAREST 0x00000010
+#define NV50_TSC_1_MINF_LINEAR 0x00000020
+#define NV50_TSC_1_MIPF__MASK 0x000000c0
+#define NV50_TSC_1_MIPF__SHIFT 6
+#define NV50_TSC_1_MIPF_NONE 0x00000040
+#define NV50_TSC_1_MIPF_NEAREST 0x00000080
+#define NV50_TSC_1_MIPF_LINEAR 0x000000c0
+#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000
+#define NV50_TSC_1_LOD_BIAS__SHIFT 12
+
+#define NV50_TSC_2 0x00000008
+#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff
+#define NV50_TSC_2_MIN_LOD__SHIFT 0
+#define NV50_TSC_2_MAX_LOD__MASK 0x00fff000
+#define NV50_TSC_2_MAX_LOD__SHIFT 12
+
+#define NV50_TSC_4 0x00000010
+#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff
+#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0
+
+#define NV50_TSC_5 0x00000014
+#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff
+#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0
+
+#define NV50_TSC_6 0x00000018
+#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff
+#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0
+
+#define NV50_TSC_7 0x0000001c
+#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff
+#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0
+
+
+#endif /* NV50_TEXTURE_XML */
diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
index d6b80c3ea7..1449cb04c6 100644
--- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
+++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
@@ -476,6 +476,7 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
stk = (struct bld_value_stack *)phi->target;
phi->target = NULL;
+ /* start with s == 1, src[0] is from outside the loop */
for (s = 1, n = 0; n < bb->num_in; ++n) {
if (bb->in_kind[n] != CFG_EDGE_BACK)
continue;
@@ -487,8 +488,11 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
for (i = 0; i < 4; ++i)
if (phi->src[i] && phi->src[i]->value == val)
break;
- if (i == 4)
+ if (i == 4) {
+ /* skip values we do not want to replace */
+ for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s);
nv_reference(bld->pc, &phi->src[s++], val);
+ }
}
bld->pc->current_block = save;
@@ -1102,9 +1106,8 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
switch (src->Register.File) {
case TGSI_FILE_CONSTANT:
- dim_idx = src->Dimension.Index ? src->Dimension.Index + 2 : 1;
- assert(dim_idx < 14);
- assert(dim_idx == 1); /* for now */
+ dim_idx = src->Dimension.Index;
+ assert(dim_idx < 15);
res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), type);
SET_TYPE(res, type);
@@ -1130,7 +1133,7 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
case TGSI_FILE_INPUT:
res = bld_saved_input(bld, idx, swz);
if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP))
- return res;
+ break;
res = new_value(bld->pc, bld->ti->input_file, type);
res->reg.id = bld->ti->input_map[idx][swz];
@@ -1156,6 +1159,13 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
case TGSI_FILE_PREDICATE:
res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ res = new_value(bld->pc, bld->ti->input_file, NV_TYPE_U32);
+ res->reg.id = bld->ti->sysval_map[idx];
+ res = bld_insn_1(bld, NV_OP_LDA, res);
+ res = bld_insn_1(bld, NV_OP_CVT, res);
+ res->reg.type = NV_TYPE_F32;
+ break;
default:
NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
abort();
@@ -1468,7 +1478,7 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
uint opcode = translate_opcode(insn->Instruction.Opcode);
int arg, dim, c;
const int tic = insn->Src[1].Register.Index;
- const int tsc = 0;
+ const int tsc = tic;
const int cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0;
get_tex_dim(insn, &dim, &arg);
@@ -1717,6 +1727,10 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *b = new_basic_block(bld->pc);
+ if (bld->pc->current_block->exit &&
+ !bld->pc->current_block->exit->is_terminator)
+ bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, b, FALSE);
+
--bld->cond_lvl;
nvbb_attach_block(bld->pc->current_block, b, bld->out_kind);
nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
@@ -1923,6 +1937,7 @@ bld_instruction(struct bld_context *bld,
dst0[c] = bld_insn_2(bld, NV_OP_XOR, temp, temp);
dst0[c]->insn->cc = NV_CC_EQ;
nv_reference(bld->pc, &dst0[c]->insn->flags_src, src1);
+ dst0[c] = bld_insn_2(bld, NV_OP_SELECT, dst0[c], temp);
}
break;
case TGSI_OPCODE_SUB:
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index bf5af4ddc6..7486977459 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -1,351 +1,347 @@
-#include "pipe/p_context.h"
-#include "util/u_inlines.h"
#include "util/u_format.h"
-#include "util/u_math.h"
#include "nv50_context.h"
#include "nv50_transfer.h"
-#include "nv50_resource.h"
+
+#include "nv50_defs.xml.h"
struct nv50_transfer {
- struct pipe_transfer base;
- struct nouveau_bo *bo;
- int map_refcnt;
- unsigned level_offset;
- unsigned level_tiling;
- int level_pitch;
- int level_width;
- int level_height;
- int level_depth;
- int level_x;
- int level_y;
- int level_z;
- unsigned nblocksx;
- unsigned nblocksy;
+ struct pipe_transfer base;
+ struct nv50_m2mf_rect rect[2];
+ uint32_t nblocksx;
+ uint32_t nblocksy;
};
static void
-nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
- struct nouveau_bo *src_bo, unsigned src_offset,
- int src_pitch, unsigned src_tile_mode,
- int sx, int sy, int sz, int sw, int sh, int sd,
- struct nouveau_bo *dst_bo, unsigned dst_offset,
- int dst_pitch, unsigned dst_tile_mode,
- int dx, int dy, int dz, int dw, int dh, int dd,
- int cpp, int width, int height,
- unsigned src_reloc, unsigned dst_reloc)
+nv50_m2mf_transfer_rect(struct pipe_screen *pscreen,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy)
{
- struct nv50_screen *screen = nv50_screen(pscreen);
- struct nouveau_channel *chan = screen->m2mf->channel;
- struct nouveau_grobj *m2mf = screen->m2mf;
-
- src_reloc |= NOUVEAU_BO_RD;
- dst_reloc |= NOUVEAU_BO_WR;
-
- WAIT_RING (chan, 14);
-
- if (!nouveau_bo_tile_layout(src_bo)) {
- BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, m2mf,
- NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
- OUT_RING (chan, src_pitch);
- src_offset += (sy * src_pitch) + (sx * cpp);
- } else {
- BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 6);
- OUT_RING (chan, 0);
- OUT_RING (chan, src_tile_mode << 4);
- OUT_RING (chan, sw * cpp);
- OUT_RING (chan, sh);
- OUT_RING (chan, sd);
- OUT_RING (chan, sz); /* copying only 1 zslice per call */
- }
-
- if (!nouveau_bo_tile_layout(dst_bo)) {
- BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, m2mf,
- NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
- OUT_RING (chan, dst_pitch);
- dst_offset += (dy * dst_pitch) + (dx * cpp);
- } else {
- BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 6);
- OUT_RING (chan, 0);
- OUT_RING (chan, dst_tile_mode << 4);
- OUT_RING (chan, dw * cpp);
- OUT_RING (chan, dh);
- OUT_RING (chan, dd);
- OUT_RING (chan, dz); /* copying only 1 zslice per call */
- }
-
- while (height) {
- int line_count = height > 2047 ? 2047 : height;
-
- MARK_RING (chan, 15, 4); /* flush on lack of space or relocs */
- BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2);
- OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
- OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc);
- BEGIN_RING(chan, m2mf,
- NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
- OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
- OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
- if (nouveau_bo_tile_layout(src_bo)) {
- BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1);
- OUT_RING (chan, (sy << 16) | (sx * cpp));
- } else {
- src_offset += (line_count * src_pitch);
- }
- if (nouveau_bo_tile_layout(dst_bo)) {
- BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1);
- OUT_RING (chan, (dy << 16) | (dx * cpp));
- } else {
- dst_offset += (line_count * dst_pitch);
- }
- BEGIN_RING(chan, m2mf,
- NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
- OUT_RING (chan, width * cpp);
- OUT_RING (chan, line_count);
- OUT_RING (chan, 0x00000101);
- OUT_RING (chan, 0);
- FIRE_RING (chan);
-
- height -= line_count;
- sy += line_count;
- dy += line_count;
- }
+ struct nouveau_channel *chan = nouveau_screen(pscreen)->channel;
+ const int cpp = dst->cpp;
+ uint32_t src_ofst = src->base;
+ uint32_t dst_ofst = dst->base;
+ uint32_t height = nblocksy;
+ uint32_t sy = src->y;
+ uint32_t dy = dst->y;
+
+ assert(dst->cpp == src->cpp);
+
+ if (nouveau_bo_tile_layout(src->bo)) {
+ BEGIN_RING(chan, RING_MF(LINEAR_IN), 6);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, src->tile_mode << 4);
+ OUT_RING (chan, src->width * cpp);
+ OUT_RING (chan, src->height);
+ OUT_RING (chan, src->depth);
+ OUT_RING (chan, src->z);
+ } else {
+ src_ofst += src->y * src->pitch + src->x * cpp;
+
+ BEGIN_RING(chan, RING_MF(LINEAR_IN), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_MF_(NV04_M2MF_PITCH_IN), 1);
+ OUT_RING (chan, src->pitch);
+ }
+
+ if (nouveau_bo_tile_layout(dst->bo)) {
+ BEGIN_RING(chan, RING_MF(LINEAR_OUT), 6);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, dst->tile_mode << 4);
+ OUT_RING (chan, dst->width * cpp);
+ OUT_RING (chan, dst->height);
+ OUT_RING (chan, dst->depth);
+ OUT_RING (chan, dst->z);
+ } else {
+ dst_ofst += dst->y * dst->pitch + dst->x * cpp;
+
+ BEGIN_RING(chan, RING_MF(LINEAR_OUT), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_MF_(NV04_M2MF_PITCH_OUT), 1);
+ OUT_RING (chan, dst->pitch);
+ }
+
+ while (height) {
+ int line_count = height > 2047 ? 2047 : height;
+
+ MARK_RING (chan, 17, 4);
+
+ BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
+ OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
+ OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, RING_MF_(NV04_M2MF_OFFSET_IN), 2);
+ OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
+
+ if (nouveau_bo_tile_layout(src->bo)) {
+ BEGIN_RING(chan, RING_MF(TILING_POSITION_IN), 1);
+ OUT_RING (chan, (sy << 16) | (src->x * cpp));
+ } else {
+ src_ofst += line_count * src->pitch;
+ }
+ if (nouveau_bo_tile_layout(dst->bo)) {
+ BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT), 1);
+ OUT_RING (chan, (dy << 16) | (dst->x * cpp));
+ } else {
+ dst_ofst += line_count * dst->pitch;
+ }
+
+ BEGIN_RING(chan, RING_MF_(NV04_M2MF_LINE_LENGTH_IN), 4);
+ OUT_RING (chan, nblocksx * cpp);
+ OUT_RING (chan, line_count);
+ OUT_RING (chan, (1 << 8) | (1 << 0));
+ OUT_RING (chan, 0);
+
+ height -= line_count;
+ sy += line_count;
+ dy += line_count;
+ }
}
-struct pipe_transfer *
-nv50_miptree_transfer_new(struct pipe_context *pcontext,
- struct pipe_resource *pt,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box)
+void
+nv50_sifc_linear_u8(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, void *data)
{
- struct pipe_screen *pscreen = pcontext->screen;
- struct nouveau_device *dev = nouveau_screen(pscreen)->device;
- struct nv50_miptree *mt = nv50_miptree(pt);
- struct nv50_miptree_level *lvl = &mt->level[level];
- struct nv50_transfer *tx;
- unsigned nx, ny, image = 0, boxz = 0;
- int ret;
-
- /* XXX can't unify these here? */
- if (pt->target == PIPE_TEXTURE_CUBE)
- image = box->z;
- else if (pt->target == PIPE_TEXTURE_3D)
- boxz = box->z;
-
- tx = CALLOC_STRUCT(nv50_transfer);
- if (!tx)
- return NULL;
-
- /* Don't handle 3D transfers yet.
- */
- assert(box->depth == 1);
-
-
- pipe_resource_reference(&tx->base.resource, pt);
- tx->base.level = level;
- tx->base.usage = usage;
- tx->base.box = *box;
- tx->nblocksx = util_format_get_nblocksx(pt->format, u_minify(pt->width0, level));
- tx->nblocksy = util_format_get_nblocksy(pt->format, u_minify(pt->height0, level));
- tx->base.stride = tx->nblocksx * util_format_get_blocksize(pt->format);
- tx->base.usage = usage;
-
- tx->level_pitch = lvl->pitch;
- tx->level_width = u_minify(mt->base.base.width0, level);
- tx->level_height = u_minify(mt->base.base.height0, level);
- tx->level_depth = u_minify(mt->base.base.depth0, level);
- tx->level_offset = lvl->image_offset[image];
- tx->level_tiling = lvl->tile_mode;
- tx->level_z = boxz;
- tx->level_x = util_format_get_nblocksx(pt->format, box->x);
- tx->level_y = util_format_get_nblocksy(pt->format, box->y);
- ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
- tx->nblocksy * tx->base.stride, &tx->bo);
- if (ret) {
- FREE(tx);
- return NULL;
- }
-
- if (usage & PIPE_TRANSFER_READ) {
- nx = util_format_get_nblocksx(pt->format, box->width);
- ny = util_format_get_nblocksy(pt->format, box->height);
-
- nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
- tx->level_pitch, tx->level_tiling,
- box->x, box->y, boxz,
- tx->nblocksx, tx->nblocksy,
- tx->level_depth,
- tx->bo, 0,
- tx->base.stride, tx->bo->tile_mode,
- 0, 0, 0,
- tx->nblocksx, tx->nblocksy, 1,
- util_format_get_blocksize(pt->format), nx, ny,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
- NOUVEAU_BO_GART);
- }
-
- return &tx->base;
+ struct nouveau_channel *chan = nv->screen->channel;
+ uint32_t *src = (uint32_t *)data;
+ unsigned count = (size + 3) / 4;
+ unsigned xcoord = offset & 0xff;
+
+ offset &= ~0xff;
+
+ MARK_RING (chan, 23, 4);
+ BEGIN_RING(chan, RING_2D(DST_FORMAT), 2);
+ OUT_RING (chan, NV50_SURFACE_FORMAT_R8_UNORM);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_2D(DST_PITCH), 5);
+ OUT_RING (chan, 262144);
+ OUT_RING (chan, 65536);
+ OUT_RING (chan, 1);
+ OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, RING_2D(SIFC_BITMAP_ENABLE), 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, NV50_SURFACE_FORMAT_R8_UNORM);
+ BEGIN_RING(chan, RING_2D(SIFC_WIDTH), 10);
+ OUT_RING (chan, size);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, xcoord);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+
+ while (count) {
+ unsigned nr = AVAIL_RING(chan);
+
+ if (nr < 9) {
+ FIRE_RING(chan);
+ nouveau_bo_validate(chan, dst, NOUVEAU_BO_WR);
+ continue;
+ }
+ nr = MIN2(count, nr - 1);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_RING_NI(chan, RING_2D(SIFC_DATA), nr);
+ OUT_RINGp (chan, src, nr);
+
+ src += nr;
+ count -= nr;
+ }
}
void
-nv50_miptree_transfer_del(struct pipe_context *pcontext,
- struct pipe_transfer *ptx)
+nv50_m2mf_copy_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size)
{
- struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
- struct nv50_miptree *mt = nv50_miptree(ptx->resource);
- struct pipe_resource *pt = ptx->resource;
-
- unsigned nx = util_format_get_nblocksx(pt->format, tx->base.box.width);
- unsigned ny = util_format_get_nblocksy(pt->format, tx->base.box.height);
-
- if (ptx->usage & PIPE_TRANSFER_WRITE) {
- struct pipe_screen *pscreen = pcontext->screen;
-
- nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
- tx->base.stride, tx->bo->tile_mode,
- 0, 0, 0,
- tx->nblocksx, tx->nblocksy, 1,
- mt->base.bo, tx->level_offset,
- tx->level_pitch, tx->level_tiling,
- tx->level_x, tx->level_y, tx->level_z,
- tx->nblocksx, tx->nblocksy,
- tx->level_depth,
- util_format_get_blocksize(pt->format), nx, ny,
- NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_GART);
- }
-
- nouveau_bo_ref(NULL, &tx->bo);
- pipe_resource_reference(&ptx->resource, NULL);
- FREE(ptx);
+ struct nouveau_channel *chan = nv->screen->channel;
+
+ BEGIN_RING(chan, RING_MF(LINEAR_IN), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_MF(LINEAR_OUT), 1);
+ OUT_RING (chan, 1);
+
+ while (size) {
+ unsigned bytes = MIN2(size, 1 << 17);
+
+ MARK_RING (chan, 11, 4);
+ BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
+ OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD);
+ OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, RING_MF_(NV04_M2MF_OFFSET_IN), 2);
+ OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, RING_MF_(NV04_M2MF_LINE_LENGTH_IN), 4);
+ OUT_RING (chan, bytes);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, (1 << 8) | (1 << 0));
+ OUT_RING (chan, 0);
+
+ srcoff += bytes;
+ dstoff += bytes;
+ size -= bytes;
+ }
}
-void *
-nv50_miptree_transfer_map(struct pipe_context *pcontext,
- struct pipe_transfer *ptx)
+struct pipe_transfer *
+nv50_miptree_transfer_new(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
- struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
- unsigned flags = 0;
- int ret;
-
- if (tx->map_refcnt++)
- return tx->bo->map;
-
- if (ptx->usage & PIPE_TRANSFER_WRITE)
- flags |= NOUVEAU_BO_WR;
- if (ptx->usage & PIPE_TRANSFER_READ)
- flags |= NOUVEAU_BO_RD;
-
- ret = nouveau_bo_map(tx->bo, flags);
- if (ret) {
- tx->map_refcnt = 0;
- return NULL;
- }
- return tx->bo->map;
+ struct nv50_context *nv50 = nv50_context(pctx);
+ struct pipe_screen *pscreen = pctx->screen;
+ struct nouveau_device *dev = nv50->screen->base.device;
+ struct nv50_miptree *mt = nv50_miptree(res);
+ struct nv50_miptree_level *lvl = &mt->level[level];
+ struct nv50_transfer *tx;
+ uint32_t size;
+ uint32_t w, h, d, z, layer;
+ int ret;
+
+ if (mt->layout_3d) {
+ z = box->z;
+ d = u_minify(res->depth0, level);
+ layer = 0;
+ } else {
+ z = 0;
+ d = 1;
+ layer = box->z;
+ }
+
+ tx = CALLOC_STRUCT(nv50_transfer);
+ if (!tx)
+ return NULL;
+
+ pipe_resource_reference(&tx->base.resource, res);
+
+ tx->base.level = level;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+
+ tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
+ tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
+
+ tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
+ tx->base.layer_stride = tx->nblocksy * tx->base.stride;
+
+ w = u_minify(res->width0, level);
+ h = u_minify(res->height0, level);
+
+ tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format);
+
+ tx->rect[0].bo = mt->base.bo;
+ tx->rect[0].base = lvl->offset + layer * mt->layer_stride;
+ tx->rect[0].tile_mode = lvl->tile_mode;
+ tx->rect[0].x = util_format_get_nblocksx(res->format, box->x);
+ tx->rect[0].y = util_format_get_nblocksy(res->format, box->y);
+ tx->rect[0].z = z;
+ tx->rect[0].width = util_format_get_nblocksx(res->format, w);
+ tx->rect[0].height = util_format_get_nblocksy(res->format, h);
+ tx->rect[0].depth = d;
+ tx->rect[0].pitch = lvl->pitch;
+ tx->rect[0].domain = NOUVEAU_BO_VRAM;
+
+ size = tx->base.layer_stride;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+ size * tx->base.box.depth, &tx->rect[1].bo);
+ if (ret) {
+ FREE(tx);
+ return NULL;
+ }
+
+ tx->rect[1].width = tx->nblocksx;
+ tx->rect[1].height = tx->nblocksy;
+ tx->rect[1].depth = 1;
+ tx->rect[1].pitch = tx->base.stride;
+ tx->rect[1].domain = NOUVEAU_BO_GART;
+
+ if (usage & PIPE_TRANSFER_READ) {
+ unsigned base = tx->rect[0].base;
+ unsigned i;
+ for (i = 0; i < box->depth; ++i) {
+ nv50_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += size;
+ }
+ tx->rect[0].z = z;
+ tx->rect[0].base = base;
+ tx->rect[1].base = 0;
+ }
+
+ return &tx->base;
}
void
-nv50_miptree_transfer_unmap(struct pipe_context *pcontext,
- struct pipe_transfer *ptx)
+nv50_miptree_transfer_del(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
{
- struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
-
- if (--tx->map_refcnt)
- return;
- nouveau_bo_unmap(tx->bo);
+ struct pipe_screen *pscreen = pctx->screen;
+ struct nv50_transfer *tx = (struct nv50_transfer *)transfer;
+ struct nv50_miptree *mt = nv50_miptree(tx->base.resource);
+ unsigned i;
+
+ if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+ for (i = 0; i < tx->base.box.depth; ++i) {
+ nv50_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += tx->nblocksy * tx->base.stride;
+ }
+ }
+
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ FREE(tx);
}
+void *
+nv50_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct nv50_transfer *tx = (struct nv50_transfer *)transfer;
+ int ret;
+ unsigned flags = 0;
+
+ if (tx->rect[1].bo->map)
+ return tx->rect[1].bo->map;
+
+ if (transfer->usage & PIPE_TRANSFER_READ)
+ flags = NOUVEAU_BO_RD;
+ if (transfer->usage & PIPE_TRANSFER_WRITE)
+ flags |= NOUVEAU_BO_WR;
+
+ ret = nouveau_bo_map(tx->rect[1].bo, flags);
+ if (ret)
+ return NULL;
+ return tx->rect[1].bo->map;
+}
void
-nv50_upload_sifc(struct nv50_context *nv50,
- struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc,
- unsigned dst_format, int dst_w, int dst_h, int dst_pitch,
- void *src, unsigned src_format, int src_pitch,
- int x, int y, int w, int h, int cpp)
+nv50_miptree_transfer_unmap(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *eng2d = nv50->screen->eng2d;
- unsigned line_dwords = (w * cpp + 3) / 4;
-
- reloc |= NOUVEAU_BO_WR;
-
- MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */
-
- if (nouveau_bo_tile_layout(bo)) {
- BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
- OUT_RING (chan, dst_format);
- OUT_RING (chan, 0);
- OUT_RING (chan, bo->tile_mode << 4);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0);
- } else {
- BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
- OUT_RING (chan, dst_format);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
- OUT_RING (chan, dst_pitch);
- }
-
- BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 4);
- OUT_RING (chan, dst_w);
- OUT_RING (chan, dst_h);
- OUT_RELOCh(chan, bo, dst_offset, reloc);
- OUT_RELOCl(chan, bo, dst_offset, reloc);
-
- /* NV50_2D_OPERATION_SRCCOPY assumed already set */
-
- BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
- OUT_RING (chan, 0);
- OUT_RING (chan, src_format);
- BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
- OUT_RING (chan, w);
- OUT_RING (chan, h);
- OUT_RING (chan, 0);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0);
- OUT_RING (chan, x);
- OUT_RING (chan, 0);
- OUT_RING (chan, y);
-
- while (h--) {
- const uint32_t *p = src;
- unsigned count = line_dwords;
-
- while (count) {
- unsigned nr = MIN2(count, 1792);
-
- if (AVAIL_RING(chan) <= nr) {
- FIRE_RING (chan);
-
- BEGIN_RING(chan, eng2d,
- NV50_2D_DST_ADDRESS_HIGH, 2);
- OUT_RELOCh(chan, bo, dst_offset, reloc);
- OUT_RELOCl(chan, bo, dst_offset, reloc);
- }
- assert(AVAIL_RING(chan) > nr);
-
- BEGIN_RING(chan, eng2d,
- NV50_2D_SIFC_DATA | (2 << 29), nr);
- OUT_RINGp (chan, p, nr);
-
- p += nr;
- count -= nr;
- }
-
- src = (uint8_t *) src + src_pitch;
- }
+ struct nv50_transfer *tx = (struct nv50_transfer *)transfer;
+
+ nouveau_bo_unmap(tx->rect[1].bo);
}
+
diff --git a/src/gallium/drivers/nv50/nv50_transfer.h b/src/gallium/drivers/nv50/nv50_transfer.h
index 6699bf546e..d3259ef4a5 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.h
+++ b/src/gallium/drivers/nv50/nv50_transfer.h
@@ -1,31 +1,38 @@
-#ifndef NV50_TRANSFER_H
-#define NV50_TRANSFER_H
+#ifndef __NV50_TRANSFER_H__
+#define __NV50_TRANSFER_H__
#include "pipe/p_state.h"
-
struct pipe_transfer *
nv50_miptree_transfer_new(struct pipe_context *pcontext,
- struct pipe_resource *pt,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box);
+ struct pipe_resource *pt,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box);
void
nv50_miptree_transfer_del(struct pipe_context *pcontext,
- struct pipe_transfer *ptx);
+ struct pipe_transfer *ptx);
void *
nv50_miptree_transfer_map(struct pipe_context *pcontext,
- struct pipe_transfer *ptx);
+ struct pipe_transfer *ptx);
void
nv50_miptree_transfer_unmap(struct pipe_context *pcontext,
- struct pipe_transfer *ptx);
+ struct pipe_transfer *ptx);
-extern void
-nv50_upload_sifc(struct nv50_context *nv50,
- struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc,
- unsigned dst_format, int dst_w, int dst_h, int dst_pitch,
- void *src, unsigned src_format, int src_pitch,
- int x, int y, int w, int h, int cpp);
+struct nv50_m2mf_rect {
+ struct nouveau_bo *bo;
+ uint32_t base;
+ unsigned domain;
+ uint32_t pitch;
+ uint32_t width;
+ uint32_t x;
+ uint32_t height;
+ uint32_t y;
+ uint16_t depth;
+ uint16_t z;
+ uint16_t tile_mode;
+ uint16_t cpp;
+};
#endif
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index d41a59d05d..abdb9ce2f9 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Ben Skeggs
+ * Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,540 +24,694 @@
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
-#include "util/u_split_prim.h"
+#include "translate/translate.h"
#include "nv50_context.h"
#include "nv50_resource.h"
-struct instance {
- struct nouveau_bo *bo;
- unsigned delta;
- unsigned stride;
- unsigned step;
- unsigned divisor;
-};
+#include "nv50_3d.xml.h"
-static void
-instance_init(struct nv50_context *nv50, struct instance *a, unsigned first)
+void
+nv50_vertex_state_delete(struct pipe_context *pipe,
+ void *hwcso)
{
- int i;
-
- for (i = 0; i < nv50->vtxelt->num_elements; i++) {
- struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
- struct pipe_vertex_buffer *vb;
-
- a[i].divisor = ve->instance_divisor;
- if (a[i].divisor) {
- vb = &nv50->vtxbuf[ve->vertex_buffer_index];
-
- a[i].bo = nv50_resource(vb->buffer)->bo;
- a[i].stride = vb->stride;
- a[i].step = first % a[i].divisor;
- a[i].delta = vb->buffer_offset + ve->src_offset +
- (first * a[i].stride);
- }
- }
+ struct nv50_vertex_stateobj *so = hwcso;
+
+ if (so->translate)
+ so->translate->release(so->translate);
+ FREE(hwcso);
}
-static void
-instance_step(struct nv50_context *nv50, struct instance *a)
+void *
+nv50_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
{
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- int i;
-
- for (i = 0; i < nv50->vtxelt->num_elements; i++) {
- if (!a[i].divisor)
- continue;
-
- BEGIN_RING(chan, tesla,
- NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
- OUT_RELOCh(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
- OUT_RELOCl(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
- if (++a[i].step == a[i].divisor) {
- a[i].step = 0;
- a[i].delta += a[i].stride;
- }
- }
+ struct nv50_vertex_stateobj *so;
+ struct translate_key transkey;
+ unsigned i;
+
+ so = MALLOC(sizeof(*so) +
+ num_elements * sizeof(struct nv50_vertex_element));
+ if (!so)
+ return NULL;
+ so->num_elements = num_elements;
+ so->instance_elts = 0;
+ so->instance_bufs = 0;
+ so->need_conversion = FALSE;
+
+ transkey.nr_elements = 0;
+ transkey.output_stride = 0;
+
+ for (i = 0; i < num_elements; ++i) {
+ const struct pipe_vertex_element *ve = &elements[i];
+ const unsigned vbi = ve->vertex_buffer_index;
+ enum pipe_format fmt = ve->src_format;
+
+ so->element[i].pipe = elements[i];
+ so->element[i].state = nv50_format_table[fmt].vtx;
+
+ if (!so->element[i].state) {
+ switch (util_format_get_nr_components(fmt)) {
+ case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
+ case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
+ case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
+ case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
+ default:
+ assert(0);
+ return NULL;
+ }
+ so->element[i].state = nv50_format_table[fmt].vtx;
+ so->need_conversion = TRUE;
+ }
+ so->element[i].state |= i;
+
+ if (1) {
+ unsigned j = transkey.nr_elements++;
+
+ transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
+ transkey.element[j].input_format = ve->src_format;
+ transkey.element[j].input_buffer = vbi;
+ transkey.element[j].input_offset = ve->src_offset;
+ transkey.element[j].instance_divisor = ve->instance_divisor;
+
+ transkey.element[j].output_format = fmt;
+ transkey.element[j].output_offset = transkey.output_stride;
+ transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;
+
+ if (unlikely(ve->instance_divisor)) {
+ so->instance_elts |= 1 << i;
+ so->instance_bufs |= 1 << vbi;
+ }
+ }
+ }
+
+ so->translate = translate_create(&transkey);
+ so->vertex_size = transkey.output_stride / 4;
+ so->packet_vertex_limit = NV04_PFIFO_MAX_PACKET_LEN /
+ MAX2(so->vertex_size, 1);
+
+ return so;
}
+#define NV50_3D_VERTEX_ATTRIB_INACTIVE \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT | \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 | \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_CONST
+
static void
-nv50_draw_arrays_instanced(struct pipe_context *pipe,
- unsigned mode, unsigned start, unsigned count,
- unsigned startInstance, unsigned instanceCount)
+nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb,
+ struct pipe_vertex_element *ve, unsigned attr)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct instance a[16];
- unsigned prim = nv50_prim(mode);
-
- instance_init(nv50, a, startInstance);
- if (!nv50_state_validate(nv50, 10 + 16*3))
- return;
-
- if (nv50->vbo_fifo) {
- nv50_push_elements_instanced(pipe, NULL, 0, 0, mode, start,
- count, startInstance,
- instanceCount);
- return;
- }
-
- BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
- OUT_RING (chan, NV50_CB_AUX | (24 << 8));
- OUT_RING (chan, startInstance);
- while (instanceCount--) {
- if (AVAIL_RING(chan) < (7 + 16*3)) {
- FIRE_RING(chan);
- if (!nv50_state_validate(nv50, 7 + 16*3)) {
- assert(0);
- return;
- }
- }
- instance_step(nv50, a);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, prim);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
- OUT_RING (chan, start);
- OUT_RING (chan, count);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
-
- prim |= (1 << 28);
- }
+ const void *data;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nv04_resource *res = nv04_resource(vb->buffer);
+ float v[4];
+ const unsigned nc = util_format_get_nr_components(ve->src_format);
+
+ data = nouveau_resource_map_offset(&nv50->base, res, vb->buffer_offset +
+ ve->src_offset, NOUVEAU_BO_RD);
+
+ util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1);
+
+ switch (nc) {
+ case 4:
+ BEGIN_RING(chan, RING_3D(VTX_ATTR_4F_X(attr)), 4);
+ OUT_RINGf (chan, v[0]);
+ OUT_RINGf (chan, v[1]);
+ OUT_RINGf (chan, v[2]);
+ OUT_RINGf (chan, v[3]);
+ break;
+ case 3:
+ BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(attr)), 3);
+ OUT_RINGf (chan, v[0]);
+ OUT_RINGf (chan, v[1]);
+ OUT_RINGf (chan, v[2]);
+ break;
+ case 2:
+ BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(attr)), 2);
+ OUT_RINGf (chan, v[0]);
+ OUT_RINGf (chan, v[1]);
+ break;
+ case 1:
+ if (attr == nv50->vertprog->vp.edgeflag) {
+ BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1);
+ OUT_RING (chan, v[0] ? 1 : 0);
+ }
+ BEGIN_RING(chan, RING_3D(VTX_ATTR_1F(attr)), 1);
+ OUT_RINGf (chan, v[0]);
+ break;
+ default:
+ assert(0);
+ break;
+ }
}
-struct inline_ctx {
- struct nv50_context *nv50;
- void *map;
-};
+static INLINE void
+nv50_vbuf_range(struct nv50_context *nv50, int vbi,
+ uint32_t *base, uint32_t *size)
+{
+ if (unlikely(nv50->vertex->instance_bufs & (1 << vbi))) {
+ /* TODO: use min and max instance divisor to get a proper range */
+ *base = 0;
+ *size = nv50->vtxbuf[vbi].buffer->width0;
+ } else {
+ assert(nv50->vbo_max_index != ~0);
+ *base = nv50->vbo_min_index * nv50->vtxbuf[vbi].stride;
+ *size = (nv50->vbo_max_index -
+ nv50->vbo_min_index + 1) * nv50->vtxbuf[vbi].stride;
+ }
+}
static void
-inline_elt08(void *priv, unsigned start, unsigned count)
+nv50_prevalidate_vbufs(struct nv50_context *nv50)
{
- struct inline_ctx *ctx = priv;
- struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
- uint8_t *map = (uint8_t *)ctx->map + start;
-
- if (count & 1) {
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
- OUT_RING (chan, map[0]);
- map++;
- count &= ~1;
- }
-
- count >>= 1;
- if (!count)
- return;
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
- while (count--) {
- OUT_RING(chan, (map[1] << 16) | map[0]);
- map += 2;
- }
+ struct pipe_vertex_buffer *vb;
+ struct nv04_resource *buf;
+ int i;
+ uint32_t base, size;
+
+ nv50->vbo_fifo = nv50->vbo_user = 0;
+
+ nv50_bufctx_reset(nv50, NV50_BUFCTX_VERTEX);
+
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ vb = &nv50->vtxbuf[i];
+ if (!vb->stride)
+ continue;
+ buf = nv04_resource(vb->buffer);
+
+ /* NOTE: user buffers with temporary storage count as mapped by GPU */
+ if (!nouveau_resource_mapped_by_gpu(vb->buffer)) {
+ if (nv50->vbo_push_hint) {
+ nv50->vbo_fifo = ~0;
+ continue;
+ } else {
+ if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) {
+ nv50->vbo_user |= 1 << i;
+ assert(vb->stride > vb->buffer_offset);
+ nv50_vbuf_range(nv50, i, &base, &size);
+ nouveau_user_buffer_upload(buf, base, size);
+ } else {
+ nouveau_buffer_migrate(&nv50->base, buf, NOUVEAU_BO_GART);
+ }
+ nv50->base.vbo_dirty = TRUE;
+ }
+ }
+ nv50_bufctx_add_resident(nv50, NV50_BUFCTX_VERTEX, buf, NOUVEAU_BO_RD);
+ nouveau_buffer_adjust_score(&nv50->base, buf, 1);
+ }
}
static void
-inline_elt16(void *priv, unsigned start, unsigned count)
+nv50_update_user_vbufs(struct nv50_context *nv50)
{
- struct inline_ctx *ctx = priv;
- struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
- uint16_t *map = (uint16_t *)ctx->map + start;
-
- if (count & 1) {
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
- OUT_RING (chan, map[0]);
- count &= ~1;
- map++;
- }
-
- count >>= 1;
- if (!count)
- return;
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
- while (count--) {
- OUT_RING(chan, (map[1] << 16) | map[0]);
- map += 2;
- }
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ uint32_t base, offset, size;
+ int i;
+ uint32_t written = 0;
+
+ for (i = 0; i < nv50->vertex->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &nv50->vertex->element[i].pipe;
+ const int b = ve->vertex_buffer_index;
+ struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b];
+ struct nv04_resource *buf = nv04_resource(vb->buffer);
+
+ if (!(nv50->vbo_user & (1 << b)))
+ continue;
+
+ if (!vb->stride) {
+ nv50_emit_vtxattr(nv50, vb, ve, i);
+ continue;
+ }
+ nv50_vbuf_range(nv50, b, &base, &size);
+
+ if (!(written & (1 << b))) {
+ written |= 1 << b;
+ nouveau_user_buffer_upload(buf, base, size);
+ }
+ offset = vb->buffer_offset + ve->src_offset;
+
+ MARK_RING (chan, 6, 4);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+ OUT_RESRCh(chan, buf, base + size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, buf, base + size - 1, NOUVEAU_BO_RD);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_START_HIGH(i)), 2);
+ OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD);
+ }
+ nv50->base.vbo_dirty = TRUE;
}
-static void
-inline_elt32(void *priv, unsigned start, unsigned count)
+static INLINE void
+nv50_release_user_vbufs(struct nv50_context *nv50)
{
- struct inline_ctx *ctx = priv;
- struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
+ uint32_t vbo_user = nv50->vbo_user;
- BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count);
- OUT_RINGp (chan, (uint32_t *)ctx->map + start, count);
+ while (vbo_user) {
+ int i = ffs(vbo_user) - 1;
+ vbo_user &= ~(1 << i);
+
+ nouveau_buffer_release_gpu_storage(nv04_resource(nv50->vtxbuf[i].buffer));
+ }
}
-static void
-inline_edgeflag(void *priv, boolean enabled)
+void
+nv50_vertex_arrays_validate(struct nv50_context *nv50)
{
- struct inline_ctx *ctx = priv;
- struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nv50_vertex_stateobj *vertex = nv50->vertex;
+ struct pipe_vertex_buffer *vb;
+ struct nv50_vertex_element *ve;
+ unsigned i;
+
+ if (unlikely(vertex->need_conversion)) {
+ nv50->vbo_fifo = ~0;
+ nv50->vbo_user = 0;
+ } else {
+ nv50_prevalidate_vbufs(nv50);
+ }
+
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_ATTRIB(0)), vertex->num_elements);
+ for (i = 0; i < vertex->num_elements; ++i) {
+ ve = &vertex->element[i];
+ vb = &nv50->vtxbuf[ve->pipe.vertex_buffer_index];
+
+ if (likely(vb->stride) || nv50->vbo_fifo) {
+ OUT_RING(chan, ve->state);
+ } else {
+ OUT_RING(chan, ve->state | NV50_3D_VERTEX_ARRAY_ATTRIB_CONST);
+ nv50->vbo_fifo &= ~(1 << i);
+ }
+ }
+
+ for (i = 0; i < vertex->num_elements; ++i) {
+ struct nv04_resource *res;
+ unsigned size, offset;
+
+ ve = &vertex->element[i];
+ vb = &nv50->vtxbuf[ve->pipe.vertex_buffer_index];
+
+ if (unlikely(ve->pipe.instance_divisor)) {
+ if (!(nv50->state.instance_elts & (1 << i))) {
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
+ OUT_RING (chan, 1);
+ }
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
+ OUT_RING (chan, ve->pipe.instance_divisor);
+ } else
+ if (unlikely(nv50->state.instance_elts & (1 << i))) {
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
+ OUT_RING (chan, 0);
+ }
+
+ res = nv04_resource(vb->buffer);
+
+ if (nv50->vbo_fifo || unlikely(vb->stride == 0)) {
+ if (!nv50->vbo_fifo)
+ nv50_emit_vtxattr(nv50, vb, &ve->pipe, i);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ OUT_RING (chan, 0);
+ continue;
+ }
+
+ size = vb->buffer->width0;
+ offset = ve->pipe.src_offset + vb->buffer_offset;
+
+ MARK_RING (chan, 8, 4);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ OUT_RING (chan, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+ OUT_RESRCh(chan, res, size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, size - 1, NOUVEAU_BO_RD);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_START_HIGH(i)), 2);
+ OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD);
+ }
+ for (; i < nv50->state.num_vtxelts; ++i) {
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_ATTRIB(i)), 1);
+ OUT_RING (chan, NV50_3D_VERTEX_ATTRIB_INACTIVE);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ OUT_RING (chan, 0);
+ }
+
+ nv50->state.num_vtxelts = vertex->num_elements;
+ nv50->state.instance_elts = vertex->instance_elts;
+}
+
+#define NV50_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
- BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
- OUT_RING (chan, enabled ? 1 : 0);
+static INLINE unsigned
+nv50_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NV50_PRIM_GL_CASE(POINTS);
+ NV50_PRIM_GL_CASE(LINES);
+ NV50_PRIM_GL_CASE(LINE_LOOP);
+ NV50_PRIM_GL_CASE(LINE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLES);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLE_FAN);
+ NV50_PRIM_GL_CASE(QUADS);
+ NV50_PRIM_GL_CASE(QUAD_STRIP);
+ NV50_PRIM_GL_CASE(POLYGON);
+ NV50_PRIM_GL_CASE(LINES_ADJACENCY);
+ NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ default:
+ return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
}
static void
-nv50_draw_elements_inline(struct pipe_context *pipe,
- struct pipe_resource *indexBuffer, unsigned indexSize,
- unsigned mode, unsigned start, unsigned count,
- unsigned startInstance, unsigned instanceCount)
+nv50_draw_vbo_flush_notify(struct nouveau_channel *chan)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct pipe_transfer *transfer;
- struct instance a[16];
- struct inline_ctx ctx;
- struct util_split_prim s;
- boolean nzi = FALSE;
- unsigned overhead;
-
- overhead = 16*3; /* potential instance adjustments */
- overhead += 4; /* Begin()/End() */
- overhead += 4; /* potential edgeflag disable/reenable */
- overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */
-
- s.priv = &ctx;
- if (indexSize == 1)
- s.emit = inline_elt08;
- else
- if (indexSize == 2)
- s.emit = inline_elt16;
- else
- s.emit = inline_elt32;
- s.edge = inline_edgeflag;
-
- ctx.nv50 = nv50;
- ctx.map = pipe_buffer_map(pipe, indexBuffer, PIPE_TRANSFER_READ, &transfer);
- assert(ctx.map);
- if (!ctx.map)
- return;
-
- instance_init(nv50, a, startInstance);
- if (!nv50_state_validate(nv50, overhead + 6 + 3))
- return;
-
- BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
- OUT_RING (chan, NV50_CB_AUX | (24 << 8));
- OUT_RING (chan, startInstance);
- while (instanceCount--) {
- unsigned max_verts;
- boolean done;
-
- util_split_prim_init(&s, mode, start, count);
- do {
- if (AVAIL_RING(chan) < (overhead + 6)) {
- FIRE_RING(chan);
- if (!nv50_state_validate(nv50, (overhead + 6))) {
- assert(0);
- return;
- }
- }
-
- max_verts = AVAIL_RING(chan) - overhead;
- if (max_verts > 2047)
- max_verts = 2047;
- if (indexSize != 4)
- max_verts <<= 1;
- instance_step(nv50, a);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0));
- done = util_split_prim_next(&s, max_verts);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
- } while (!done);
-
- nzi = TRUE;
- }
-
- pipe_buffer_unmap(pipe, indexBuffer, transfer);
+ struct nv50_context *nv50 = chan->user_private;
+
+ nouveau_fence_update(&nv50->screen->base, TRUE);
+
+ nv50_bufctx_emit_relocs(nv50);
}
static void
-nv50_draw_elements_instanced(struct pipe_context *pipe,
- struct pipe_resource *indexBuffer,
- unsigned indexSize, int indexBias,
- unsigned mode, unsigned start, unsigned count,
- unsigned startInstance, unsigned instanceCount)
+nv50_draw_arrays(struct nv50_context *nv50,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct instance a[16];
- unsigned prim = nv50_prim(mode);
-
- instance_init(nv50, a, startInstance);
- if (!nv50_state_validate(nv50, 13 + 16*3))
- return;
-
- if (nv50->vbo_fifo) {
- nv50_push_elements_instanced(pipe, indexBuffer, indexSize,
- indexBias, mode, start, count,
- startInstance, instanceCount);
- return;
- }
-
- /* indices are uint32 internally, so large indexBias means negative */
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_BASE, 1);
- OUT_RING (chan, indexBias);
-
- if (!nv50_resource_mapped_by_gpu(indexBuffer) || indexSize == 1) {
- nv50_draw_elements_inline(pipe, indexBuffer, indexSize,
- mode, start, count, startInstance,
- instanceCount);
- return;
- }
-
- BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
- OUT_RING (chan, NV50_CB_AUX | (24 << 8));
- OUT_RING (chan, startInstance);
- while (instanceCount--) {
- if (AVAIL_RING(chan) < (7 + 16*3)) {
- FIRE_RING(chan);
- if (!nv50_state_validate(nv50, 10 + 16*3)) {
- assert(0);
- return;
- }
- }
- instance_step(nv50, a);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, prim);
- if (indexSize == 4) {
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
- OUT_RING (chan, count);
- nouveau_pushbuf_submit(chan,
- nv50_resource(indexBuffer)->bo,
- start << 2, count << 2);
- } else
- if (indexSize == 2) {
- unsigned vb_start = (start & ~1);
- unsigned vb_end = (start + count + 1) & ~1;
- unsigned dwords = (vb_end - vb_start) >> 1;
-
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
- OUT_RING (chan, ((start & 1) << 31) | count);
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
- OUT_RING (chan, dwords);
- nouveau_pushbuf_submit(chan,
- nv50_resource(indexBuffer)->bo,
- vb_start << 1, dwords << 2);
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
- OUT_RING (chan, 0);
- }
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
-
- prim |= (1 << 28);
- }
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ unsigned prim;
+
+ chan->flush_notify = nv50_draw_vbo_flush_notify;
+ chan->user_private = nv50;
+
+ prim = nv50_prim_gl(mode);
+
+ while (instance_count--) {
+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (chan, prim);
+ BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1);
+ OUT_RING (chan, 0);
+
+ prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+
+ chan->flush_notify = nv50_default_flush_notify;
}
-void
-nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+static void
+nv50_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map,
+ unsigned start, unsigned count)
{
- struct nv50_context *nv50 = nv50_context(pipe);
-
- if (info->indexed && nv50->idxbuf.buffer) {
- unsigned offset;
-
- assert(nv50->idxbuf.offset % nv50->idxbuf.index_size == 0);
- offset = nv50->idxbuf.offset / nv50->idxbuf.index_size;
-
- nv50_draw_elements_instanced(pipe,
- nv50->idxbuf.buffer,
- nv50->idxbuf.index_size,
- info->index_bias,
- info->mode,
- info->start + offset,
- info->count,
- info->start_instance,
- info->instance_count);
- }
- else {
- nv50_draw_arrays_instanced(pipe,
- info->mode,
- info->start,
- info->count,
- info->start_instance,
- info->instance_count);
- }
+ map += start;
+
+ if (count & 3) {
+ unsigned i;
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), count & 3);
+ for (i = 0; i < (count & 3); ++i)
+ OUT_RING(chan, *map++);
+ count &= ~3;
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr);
+ for (i = 0; i < nr; ++i) {
+ OUT_RING(chan,
+ (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
+ map += 4;
+ }
+ count -= nr * 4;
+ }
}
-static INLINE boolean
-nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
- struct nouveau_stateobj **pso,
- struct pipe_vertex_element *ve,
- struct pipe_vertex_buffer *vb)
-
+static void
+nv50_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map,
+ unsigned start, unsigned count)
{
- struct nouveau_stateobj *so;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
- float v[4];
- int ret;
- unsigned nr_components = util_format_get_nr_components(ve->src_format);
-
- ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
- if (ret)
- return FALSE;
-
- util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map +
- (vb->buffer_offset + ve->src_offset), 0,
- 0, 0, 1, 1);
- so = *pso;
- if (!so)
- *pso = so = so_new(nv50->vtxelt->num_elements,
- nv50->vtxelt->num_elements * 4, 0);
-
- switch (nr_components) {
- case 4:
- so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
- so_data (so, fui(v[0]));
- so_data (so, fui(v[1]));
- so_data (so, fui(v[2]));
- so_data (so, fui(v[3]));
- break;
- case 3:
- so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 3);
- so_data (so, fui(v[0]));
- so_data (so, fui(v[1]));
- so_data (so, fui(v[2]));
- break;
- case 2:
- so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 2);
- so_data (so, fui(v[0]));
- so_data (so, fui(v[1]));
- break;
- case 1:
- if (attrib == nv50->vertprog->vp.edgeflag) {
- so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
- so_data (so, v[0] ? 1 : 0);
- }
- so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
- so_data (so, fui(v[0]));
- break;
- default:
- nouveau_bo_unmap(bo);
- return FALSE;
- }
-
- nouveau_bo_unmap(bo);
- return TRUE;
+ map += start;
+
+ if (count & 1) {
+ count &= ~1;
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
+ OUT_RING (chan, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ OUT_RING(chan, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
}
-void
-nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
+static void
+nv50_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map,
+ unsigned start, unsigned count)
{
- unsigned i;
+ map += start;
+
+ while (count) {
+ const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
- for (i = 0; i < cso->num_elements; ++i)
- cso->hw[i] = nv50_format_table[cso->pipe[i].src_format].vtx;
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr);
+ OUT_RINGp (chan, map, nr);
+
+ map += nr;
+ count -= nr;
+ }
}
-struct nouveau_stateobj *
-nv50_vbo_validate(struct nv50_context *nv50)
+static void
+nv50_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map,
+ unsigned start, unsigned count)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr;
- unsigned i, n_ve;
-
- /* don't validate if Gallium took away our buffers */
- if (nv50->vtxbuf_nr == 0)
- return NULL;
-
- nv50->vbo_fifo = 0;
- if (nv50->screen->force_push ||
- nv50->vertprog->vp.edgeflag < 16)
- nv50->vbo_fifo = 0xffff;
-
- for (i = 0; i < nv50->vtxbuf_nr; i++) {
- if (nv50->vtxbuf[i].stride &&
- !nv50_resource_mapped_by_gpu(nv50->vtxbuf[i].buffer))
- nv50->vbo_fifo = 0xffff;
- }
-
- n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
-
- vtxattr = NULL;
- vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt->num_elements * 4);
- vtxfmt = so_new(1, n_ve, 0);
- so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
-
- for (i = 0; i < nv50->vtxelt->num_elements; i++) {
- struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
- struct pipe_vertex_buffer *vb =
- &nv50->vtxbuf[ve->vertex_buffer_index];
- struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
- uint32_t hw = nv50->vtxelt->hw[i];
-
- if (!vb->stride &&
- nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
- so_data(vtxfmt, hw | (1 << 4));
-
- so_method(vtxbuf, tesla,
- NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
- so_data (vtxbuf, 0);
-
- nv50->vbo_fifo &= ~(1 << i);
- continue;
- }
-
- if (nv50->vbo_fifo) {
- so_data (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i));
- so_method(vtxbuf, tesla,
- NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
- so_data (vtxbuf, 0);
- continue;
- }
-
- so_data(vtxfmt, hw | i);
-
- so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
- so_data (vtxbuf, 0x20000000 |
- (ve->instance_divisor ? 0 : vb->stride));
- so_reloc (vtxbuf, bo, vb->buffer_offset +
- ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
- NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (vtxbuf, bo, vb->buffer_offset +
- ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
- NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-
- /* vertex array limits */
- so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
- so_reloc (vtxbuf, bo, vb->buffer->width0 - 1,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (vtxbuf, bo, vb->buffer->width0 - 1,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
- }
- for (; i < n_ve; ++i) {
- so_data (vtxfmt, 0x7e080010);
-
- so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
- so_data (vtxbuf, 0);
- }
- nv50->state.vtxelt_nr = nv50->vtxelt->num_elements;
-
- so_ref (vtxbuf, &nv50->state.vtxbuf);
- so_ref (vtxattr, &nv50->state.vtxattr);
- so_ref (NULL, &vtxbuf);
- so_ref (NULL, &vtxattr);
- return vtxfmt;
+ map += start;
+
+ if (count & 1) {
+ count--;
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
+ OUT_RING (chan, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ OUT_RING(chan, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
}
+static void
+nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count, int32_t index_bias)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ void *data;
+ unsigned prim;
+ const unsigned index_size = nv50->idxbuf.index_size;
+
+ chan->flush_notify = nv50_draw_vbo_flush_notify;
+ chan->user_private = nv50;
+
+ prim = nv50_prim_gl(mode);
+
+ if (index_bias != nv50->state.index_bias) {
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_BASE), 1);
+ OUT_RING (chan, index_bias);
+ nv50->state.index_bias = index_bias;
+ }
+
+ if (nouveau_resource_mapped_by_gpu(nv50->idxbuf.buffer)) {
+ struct nv04_resource *res = nv04_resource(nv50->idxbuf.buffer);
+
+ start += nv50->idxbuf.offset >> (index_size >> 1);
+
+ nouveau_buffer_adjust_score(&nv50->base, res, 1);
+
+ while (instance_count--) {
+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (chan, mode);
+
+ switch (index_size) {
+ case 4:
+ {
+ WAIT_RING (chan, 2);
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32) | 0x30000, 0);
+ OUT_RING (chan, count);
+ nouveau_pushbuf_submit(chan, res->bo, res->offset + start * 4,
+ count * 4);
+ }
+ break;
+ case 2:
+ {
+ unsigned pb_start = (start & ~1);
+ unsigned pb_words = (((start + count + 1) & ~1) - pb_start) >> 1;
+
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U16_SETUP), 1);
+ OUT_RING (chan, (start << 31) | count);
+ WAIT_RING (chan, 2);
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U16) | 0x30000, 0);
+ OUT_RING (chan, pb_words);
+ nouveau_pushbuf_submit(chan, res->bo, res->offset + pb_start * 2,
+ pb_words * 4);
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U16_SETUP), 1);
+ OUT_RING (chan, 0);
+ break;
+ }
+ case 1:
+ {
+ unsigned pb_start = (start & ~3);
+ unsigned pb_words = (((start + count + 3) & ~3) - pb_start) >> 1;
+
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U8_SETUP), 1);
+ OUT_RING (chan, (start << 30) | count);
+ WAIT_RING (chan, 2);
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U8) | 0x30000, 0);
+ OUT_RING (chan, pb_words);
+ nouveau_pushbuf_submit(chan, res->bo, res->offset + pb_start,
+ pb_words * 4);
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U8_SETUP), 1);
+ OUT_RING (chan, 0);
+ break;
+ }
+ default:
+ assert(0);
+ return;
+ }
+ BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1);
+ OUT_RING (chan, 0);
+
+ nv50_resource_fence(res, NOUVEAU_BO_RD);
+
+ mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ } else {
+ data = nouveau_resource_map_offset(&nv50->base,
+ nv04_resource(nv50->idxbuf.buffer),
+ nv50->idxbuf.offset, NOUVEAU_BO_RD);
+ if (!data)
+ return;
+
+ while (instance_count--) {
+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (chan, prim);
+ switch (index_size) {
+ case 1:
+ nv50_draw_elements_inline_u08(chan, data, start, count);
+ break;
+ case 2:
+ nv50_draw_elements_inline_u16(chan, data, start, count);
+ break;
+ case 4:
+ if (shorten)
+ nv50_draw_elements_inline_u32_short(chan, data, start, count);
+ else
+ nv50_draw_elements_inline_u32(chan, data, start, count);
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1);
+ OUT_RING (chan, 0);
+
+ prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ }
+
+ chan->flush_notify = nv50_default_flush_notify;
+}
+void
+nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+
+ /* For picking only a few vertices from a large user buffer, push is better,
+ * if index count is larger and we expect repeated vertices, suggest upload.
+ */
+ nv50->vbo_push_hint = /* the 64 is heuristic */
+ !(info->indexed &&
+ ((info->max_index - info->min_index + 64) < info->count));
+
+ nv50->vbo_min_index = info->min_index;
+ nv50->vbo_max_index = info->max_index;
+
+ if (nv50->vbo_push_hint != !!nv50->vbo_fifo)
+ nv50->dirty |= NV50_NEW_ARRAYS;
+
+ if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS)))
+ nv50_update_user_vbufs(nv50);
+
+ nv50_state_validate(nv50);
+
+ if (nv50->vbo_fifo) {
+ nv50_push_vbo(nv50, info);
+ return;
+ }
+
+ if (nv50->state.instance_base != info->start_instance) {
+ nv50->state.instance_base = info->start_instance;
+ /* NOTE: this does not affect the shader input, should it ? */
+ BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1);
+ OUT_RING (chan, info->start_instance);
+ }
+
+ if (nv50->base.vbo_dirty) {
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 1);
+ OUT_RING (chan, 0);
+ nv50->base.vbo_dirty = FALSE;
+ }
+
+ if (!info->indexed) {
+ nv50_draw_arrays(nv50,
+ info->mode, info->start, info->count,
+ info->instance_count);
+ } else {
+ boolean shorten = info->max_index <= 65535;
+
+ assert(nv50->idxbuf.buffer);
+
+ if (info->primitive_restart != nv50->state.prim_restart) {
+ if (info->primitive_restart) {
+ BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ } else {
+ BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 1);
+ OUT_RING (chan, 0);
+ }
+ nv50->state.prim_restart = info->primitive_restart;
+ } else
+ if (info->primitive_restart) {
+ BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1);
+ OUT_RING (chan, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ }
+
+ nv50_draw_elements(nv50, shorten,
+ info->mode, info->start, info->count,
+ info->instance_count, info->index_bias);
+ }
+
+ nv50_release_user_vbufs(nv50);
+}
diff --git a/src/gallium/drivers/nv50/nv50_winsys.h b/src/gallium/drivers/nv50/nv50_winsys.h
new file mode 100644
index 0000000000..afa2a00c7a
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_winsys.h
@@ -0,0 +1,106 @@
+
+#ifndef __NV50_WINSYS_H__
+#define __NV50_WINSYS_H__
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include "pipe/p_defines.h"
+
+#include "nouveau/nouveau_bo.h"
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_grobj.h"
+#include "nouveau/nouveau_device.h"
+#include "nouveau/nouveau_resource.h"
+#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nouveau_reloc.h"
+#include "nouveau/nouveau_notifier.h"
+
+#include "nouveau/nouveau_buffer.h"
+
+#ifndef NV04_PFIFO_MAX_PACKET_LEN
+#define NV04_PFIFO_MAX_PACKET_LEN 2047
+#endif
+
+#define NV50_SUBCH_3D 5
+#define NV50_SUBCH_2D 6
+#define NV50_SUBCH_MF 7
+
+#define NV50_MF_(n) NV50_M2MF_##n
+
+#define RING_3D(n) ((NV50_SUBCH_3D << 13) | NV50_3D_##n)
+#define RING_2D(n) ((NV50_SUBCH_2D << 13) | NV50_2D_##n)
+#define RING_MF(n) ((NV50_SUBCH_MF << 13) | NV50_MF_(n))
+
+#define RING_3D_(m) ((NV50_SUBCH_3D << 13) | (m))
+#define RING_2D_(m) ((NV50_SUBCH_2D << 13) | (m))
+#define RING_MF_(m) ((NV50_SUBCH_MF << 13) | (m))
+
+#define RING_GR(gr, m) (((gr)->subc << 13) | (m))
+
+int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min);
+
+static inline uint32_t
+nouveau_bo_tile_layout(struct nouveau_bo *bo)
+{
+ return bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK;
+}
+
+static INLINE void
+nouveau_bo_validate(struct nouveau_channel *chan,
+ struct nouveau_bo *bo, unsigned flags)
+{
+ nouveau_reloc_emit(chan, NULL, 0, NULL, bo, 0, 0, flags, 0, 0);
+}
+
+/* incremental methods */
+static INLINE void
+BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+ WAIT_RING(chan, size + 1);
+ OUT_RING (chan, (size << 18) | mthd);
+}
+
+/* non-incremental */
+static INLINE void
+BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+ WAIT_RING(chan, size + 1);
+ OUT_RING (chan, (0x2 << 29) | (size << 18) | mthd);
+}
+
+static INLINE int
+OUT_RESRCh(struct nouveau_channel *chan, struct nv04_resource *res,
+ unsigned delta, unsigned flags)
+{
+ return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags);
+}
+
+static INLINE int
+OUT_RESRCl(struct nouveau_channel *chan, struct nv04_resource *res,
+ unsigned delta, unsigned flags)
+{
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags);
+}
+
+static INLINE void
+BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned s)
+{
+ struct nouveau_subchannel *subc = &gr->channel->subc[s];
+
+ assert(s < 8);
+ if (subc->gr) {
+ assert(subc->gr->bound != NOUVEAU_GROBJ_BOUND_EXPLICIT);
+ subc->gr->bound = NOUVEAU_GROBJ_UNBOUND;
+ }
+ subc->gr = gr;
+ subc->gr->subc = s;
+ subc->gr->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT;
+
+ BEGIN_RING(chan, RING_GR(gr, 0x0000), 1);
+ OUT_RING (chan, gr->handle);
+}
+
+#endif
diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile
new file mode 100644
index 0000000000..e1cd188eec
--- /dev/null
+++ b/src/gallium/drivers/nvc0/Makefile
@@ -0,0 +1,34 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nvc0
+
+C_SOURCES = \
+ nvc0_context.c \
+ nvc0_draw.c \
+ nvc0_formats.c \
+ nvc0_miptree.c \
+ nvc0_resource.c \
+ nvc0_screen.c \
+ nvc0_state.c \
+ nvc0_state_validate.c \
+ nvc0_surface.c \
+ nvc0_tex.c \
+ nvc0_transfer.c \
+ nvc0_vbo.c \
+ nvc0_program.c \
+ nvc0_shader_state.c \
+ nvc0_pc.c \
+ nvc0_pc_print.c \
+ nvc0_pc_emit.c \
+ nvc0_tgsi_to_nc.c \
+ nvc0_pc_optimize.c \
+ nvc0_pc_regalloc.c \
+ nvc0_push.c \
+ nvc0_push2.c \
+ nvc0_query.c
+
+LIBRARY_INCLUDES = \
+ $(LIBDRM_CFLAGS)
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript
new file mode 100644
index 0000000000..dbbbf663b3
--- /dev/null
+++ b/src/gallium/drivers/nvc0/SConscript
@@ -0,0 +1,33 @@
+Import('*')
+
+env = env.Clone()
+
+nvc0 = env.ConvenienceLibrary(
+ target = 'nvc0',
+ source = [
+ 'nvc0_context.c',
+ 'nvc0_draw.c',
+ 'nvc0_formats.c',
+ 'nvc0_miptree.c',
+ 'nvc0_resource.c',
+ 'nvc0_screen.c',
+ 'nvc0_state.c',
+ 'nvc0_state_validate.c',
+ 'nvc0_surface.c',
+ 'nvc0_tex.c',
+ 'nvc0_transfer.c',
+ 'nvc0_vbo.c',
+ 'nvc0_program.c',
+ 'nvc0_shader_state.c',
+ 'nvc0_pc.c',
+ 'nvc0_pc_print.c',
+ 'nvc0_pc_emit.c',
+ 'nvc0_tgsi_to_nc.c',
+ 'nvc0_pc_optimize.c',
+ 'nvc0_pc_regalloc.c',
+ 'nvc0_push.c',
+ 'nvc0_push2.c',
+ 'nvc0_query.c'
+ ])
+
+Export('nvc0')
diff --git a/src/gallium/drivers/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nvc0/nvc0_2d.xml.h
new file mode 100644
index 0000000000..aebcd510e8
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_2d.xml.h
@@ -0,0 +1,380 @@
+#ifndef NVC0_2D_XML
+#define NVC0_2D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_2D_DST_FORMAT 0x00000200
+
+#define NVC0_2D_DST_LINEAR 0x00000204
+
+#define NVC0_2D_DST_TILE_MODE 0x00000208
+
+#define NVC0_2D_DST_DEPTH 0x0000020c
+
+#define NVC0_2D_DST_LAYER 0x00000210
+
+#define NVC0_2D_DST_PITCH 0x00000214
+
+#define NVC0_2D_DST_WIDTH 0x00000218
+
+#define NVC0_2D_DST_HEIGHT 0x0000021c
+
+#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220
+
+#define NVC0_2D_DST_ADDRESS_LOW 0x00000224
+
+#define NVC0_2D_UNK228 0x00000228
+
+#define NVC0_2D_SRC_FORMAT 0x00000230
+
+#define NVC0_2D_SRC_LINEAR 0x00000234
+
+#define NVC0_2D_SRC_TILE_MODE 0x00000238
+
+#define NVC0_2D_SRC_DEPTH 0x0000023c
+
+#define NVC0_2D_SRC_LAYER 0x00000240
+
+#define NVC0_2D_SRC_PITCH 0x00000244
+#define NVC0_2D_SRC_PITCH__MAX 0x00040000
+
+#define NVC0_2D_SRC_WIDTH 0x00000248
+#define NVC0_2D_SRC_WIDTH__MAX 0x00010000
+
+#define NVC0_2D_SRC_HEIGHT 0x0000024c
+#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000
+
+#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250
+
+#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254
+
+#define NVC0_2D_UNK258 0x00000258
+
+#define NVC0_2D_UNK260 0x00000260
+
+#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264
+
+#define NVC0_2D_COND_ADDRESS_LOW 0x00000268
+
+#define NVC0_2D_COND_MODE 0x0000026c
+#define NVC0_2D_COND_MODE_NEVER 0x00000000
+#define NVC0_2D_COND_MODE_ALWAYS 0x00000001
+#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_2D_COND_MODE_EQUAL 0x00000003
+#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_2D_CLIP_X 0x00000280
+
+#define NVC0_2D_CLIP_Y 0x00000284
+
+#define NVC0_2D_CLIP_W 0x00000288
+
+#define NVC0_2D_CLIP_H 0x0000028c
+
+#define NVC0_2D_CLIP_ENABLE 0x00000290
+
+#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294
+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000
+#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001
+#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002
+#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003
+#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004
+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005
+#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006
+
+#define NVC0_2D_COLOR_KEY 0x00000298
+
+#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c
+
+#define NVC0_2D_ROP 0x000002a0
+
+#define NVC0_2D_BETA1 0x000002a4
+
+#define NVC0_2D_BETA4 0x000002a8
+
+#define NVC0_2D_OPERATION 0x000002ac
+#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000
+#define NVC0_2D_OPERATION_ROP_AND 0x00000001
+#define NVC0_2D_OPERATION_BLEND_AND 0x00000002
+#define NVC0_2D_OPERATION_SRCCOPY 0x00000003
+#define NVC0_2D_OPERATION_UNK4 0x00000004
+#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005
+#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006
+
+#define NVC0_2D_UNK2B0 0x000002b0
+#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f
+#define NVC0_2D_UNK2B0_UNK0__SHIFT 0
+#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00
+#define NVC0_2D_UNK2B0_UNK1__SHIFT 8
+
+#define NVC0_2D_PATTERN_SELECT 0x000002b4
+#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000
+#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001
+#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002
+#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003
+
+#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8
+#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000
+#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001
+#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002
+#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003
+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004
+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005
+
+#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec
+#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000
+#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001
+
+#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0))
+#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002
+
+#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0))
+#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002
+
+#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0))
+#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040
+#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff
+#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0
+#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00
+#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8
+#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000
+#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16
+
+#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0))
+#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020
+#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f
+#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0
+#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0
+#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5
+#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800
+#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11
+#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000
+#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16
+#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000
+#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21
+#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000
+#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27
+
+#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0))
+#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020
+#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f
+#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0
+#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0
+#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5
+#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00
+#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10
+#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000
+#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16
+#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000
+#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21
+#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000
+#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26
+
+#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0))
+#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_Y8__LEN 0x00000010
+#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff
+#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0
+#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00
+#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8
+#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000
+#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16
+#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000
+#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24
+
+#define NVC0_2D_DRAW_SHAPE 0x00000580
+#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000
+#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001
+#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002
+#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003
+#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004
+
+#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584
+
+#define NVC0_2D_DRAW_COLOR 0x00000588
+
+#define NVC0_2D_UNK58C 0x0000058c
+#define NVC0_2D_UNK58C_0 0x00000001
+#define NVC0_2D_UNK58C_1 0x00000010
+#define NVC0_2D_UNK58C_2 0x00000100
+#define NVC0_2D_UNK58C_3 0x00001000
+
+#define NVC0_2D_DRAW_POINT16 0x000005e0
+#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff
+#define NVC0_2D_DRAW_POINT16_X__SHIFT 0
+#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000
+#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16
+
+#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0))
+#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008
+#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040
+
+#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0))
+#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008
+#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040
+
+#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800
+
+#define NVC0_2D_SIFC_FORMAT 0x00000804
+
+#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002
+
+#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c
+
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002
+
+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814
+
+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818
+
+#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c
+
+#define NVC0_2D_SIFC_WIDTH 0x00000838
+
+#define NVC0_2D_SIFC_HEIGHT 0x0000083c
+
+#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840
+
+#define NVC0_2D_SIFC_DX_DU_INT 0x00000844
+
+#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848
+
+#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c
+
+#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850
+
+#define NVC0_2D_SIFC_DST_X_INT 0x00000854
+
+#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858
+
+#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c
+
+#define NVC0_2D_SIFC_DATA 0x00000860
+
+#define NVC0_2D_UNK0870 0x00000870
+
+#define NVC0_2D_UNK0880 0x00000880
+
+#define NVC0_2D_UNK0884 0x00000884
+
+#define NVC0_2D_UNK0888 0x00000888
+
+#define NVC0_2D_BLIT_CONTROL 0x0000088c
+#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001
+#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0
+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000
+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001
+#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010
+#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4
+#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000
+#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010
+
+#define NVC0_2D_BLIT_DST_X 0x000008b0
+
+#define NVC0_2D_BLIT_DST_Y 0x000008b4
+
+#define NVC0_2D_BLIT_DST_W 0x000008b8
+
+#define NVC0_2D_BLIT_DST_H 0x000008bc
+
+#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0
+
+#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4
+
+#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8
+
+#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc
+
+#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0
+
+#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4
+
+#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8
+
+#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc
+
+
+#endif /* NVC0_2D_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
new file mode 100644
index 0000000000..94fa081ad7
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
@@ -0,0 +1,1244 @@
+#ifndef NVC0_3D_XML
+#define NVC0_3D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07)
+- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40)
+- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20)
+- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28)
+- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17)
+
+Copyright (C) 2006-2011 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104
+#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108
+#define NVC0_3D_NOTIFY 0x0000010c
+
+#define NVC0_3D_SERIALIZE 0x00000110
+
+#define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210
+
+#define NVC0_3D_MEM_BARRIER 0x0000021c
+#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001
+#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002
+#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004
+#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010
+#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100
+#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000
+
+#define NVC0_3D_TESS_MODE 0x00000320
+#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f
+#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0
+#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000
+#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001
+#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002
+#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0
+#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4
+#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000
+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010
+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020
+#define NVC0_3D_TESS_MODE_CW 0x00000100
+#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200
+
+#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0))
+#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004
+#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004
+
+#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0))
+#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004
+#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002
+
+#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c
+
+#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0))
+#define NVC0_3D_TFB__ESIZE 0x00000020
+#define NVC0_3D_TFB__LEN 0x00000004
+
+#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0))
+
+#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0))
+
+#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0))
+
+#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0))
+
+#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0))
+
+#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0))
+#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010
+#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004
+
+#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0))
+#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010
+#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004
+
+#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0))
+#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010
+#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004
+
+#define NVC0_3D_TFB_ENABLE 0x00000744
+
+#define NVC0_3D_LOCAL_BASE 0x0000077c
+
+#define NVC0_3D_LOCAL_ADDRESS_HIGH 0x00000790
+
+#define NVC0_3D_LOCAL_ADDRESS_LOW 0x00000794
+
+#define NVC0_3D_LOCAL_SIZE_HIGH 0x00000798
+
+#define NVC0_3D_LOCAL_SIZE_LOW 0x0000079c
+
+#define NVC0_3D_RT(i0) (0x00000800 + 0x40*(i0))
+#define NVC0_3D_RT__ESIZE 0x00000040
+#define NVC0_3D_RT__LEN 0x00000008
+
+#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0))
+
+#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0))
+
+#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0))
+
+#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0))
+
+#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0))
+
+#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0))
+#define NVC0_3D_RT_TILE_MODE_X 0x00000001
+#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070
+#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4
+#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700
+#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8
+#define NVC0_3D_RT_TILE_MODE_LINEAR 0x00001000
+#define NVC0_3D_RT_TILE_MODE_UNK16 0x00010000
+
+#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0))
+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
+#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000
+
+#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0))
+
+#define NVC0_3D_RT_BASE_LAYER(i0) (0x00000820 + 0x40*(i0))
+
+#define NVC0_3D_RT_UNK14(i0) (0x00000824 + 0x40*(i0))
+
+#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0))
+#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010
+#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010
+#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0
+#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16
+
+#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0))
+#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010
+#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010
+#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0
+#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000
+#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16
+
+#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0))
+#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010
+#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010
+
+#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0))
+#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
+#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010
+
+#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0))
+#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008
+#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008
+#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff
+#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0
+#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000
+#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16
+
+#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0))
+#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008
+#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008
+#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff
+#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0
+#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000
+#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16
+
+#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
+#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
+#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004
+#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0
+#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16
+
+#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0))
+#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008
+#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004
+#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0
+#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
+#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16
+
+#define NVC0_3D_COUNTER_ENABLE 0x00000d68
+#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001
+#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002
+#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004
+#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008
+#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010
+#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020
+#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040
+#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080
+#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100
+#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200
+#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400
+#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800
+#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000
+#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000
+#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000
+#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000
+
+#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74
+
+#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78
+
+#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0))
+#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004
+#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004
+
+#define NVC0_3D_CLEAR_DEPTH 0x00000d90
+
+#define NVC0_3D_CLEAR_STENCIL 0x00000da0
+
+#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4
+
+#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
+
+#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
+
+#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
+
+#define NVC0_3D_PATCH_VERTICES 0x00000dcc
+
+#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8
+
+#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc
+
+#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010
+
+#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010
+#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff
+#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0
+#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000
+#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16
+
+#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010
+#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff
+#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0
+#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000
+#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16
+
+#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54
+
+#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58
+
+#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c
+
+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84
+
+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88
+
+#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0))
+#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004
+#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002
+
+#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0))
+#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004
+#define NVC0_3D_MSAA_MASK__LEN 0x00000004
+
+#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc
+
+#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0
+
+#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0
+
+#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4
+
+#define NVC0_3D_ZETA_FORMAT 0x00000fe8
+
+#define NVC0_3D_ZETA_TILE_MODE 0x00000fec
+
+#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0
+
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0
+
+#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8
+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000
+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16
+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
+
+#define NVC0_3D_CLEAR_FLAGS 0x000010f8
+#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001
+#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010
+#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100
+#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000
+
+#define NVC0_3D_VERTEX_ID 0x00001118
+
+#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c
+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff
+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000
+
+#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0))
+#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004
+#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004
+
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0))
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000003f
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_2_10_10_10 0x06000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x78000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000
+
+#define NVC0_3D_RT_CONTROL 0x0000121c
+#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f
+#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0
+#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070
+#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4
+#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380
+#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7
+#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00
+#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10
+#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000
+#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13
+#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000
+#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16
+#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000
+#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19
+#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000
+#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22
+#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000
+#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25
+
+#define NVC0_3D_ZETA_HORIZ 0x00001228
+
+#define NVC0_3D_ZETA_VERT 0x0000122c
+
+#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230
+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0
+#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000
+
+#define NVC0_3D_LINKED_TSC 0x00001234
+
+#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c
+
+#define NVC0_3D_FP_RESULT_COUNT 0x00001298
+
+#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc
+
+#define NVC0_3D_D3D_FILL_MODE 0x000012d0
+#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001
+#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002
+#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003
+
+#define NVC0_3D_SHADE_MODEL 0x000012d4
+#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00
+#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01
+
+#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4
+
+#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8
+
+#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec
+
+#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0
+
+#define NVC0_3D_VB_ELEMENT_U8 0x00001304
+#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff
+#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0
+#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00
+#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8
+#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000
+#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16
+#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000
+#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24
+
+#define NVC0_3D_D3D_CULL_MODE 0x00001308
+#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001
+#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002
+#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003
+
+#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c
+#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200
+#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201
+#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202
+#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_ALPHA_TEST_REF 0x00001310
+
+#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314
+#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318
+#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001
+#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff
+
+#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0))
+#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004
+#define NVC0_3D_BLEND_COLOR__LEN 0x00000004
+
+#define NVC0_3D_TSC_FLUSH 0x00001330
+#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_3D_TIC_FLUSH 0x00001334
+#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_3D_TEX_CACHE_CTL 0x00001338
+#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344
+
+#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348
+
+#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350
+
+#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358
+
+#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0))
+#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004
+#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008
+
+#define NVC0_3D_STENCIL_ENABLE 0x00001380
+
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398
+
+#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c
+
+#define NVC0_3D_DRAW_TFB_BASE 0x000013a4
+
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000
+
+#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac
+#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001
+#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010
+
+#define NVC0_3D_LINE_WIDTH 0x000013b0
+
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400
+
+#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c
+
+#define NVC0_3D_VB_ELEMENT_BASE 0x00001434
+
+#define NVC0_3D_VB_INSTANCE_BASE 0x00001438
+
+#define NVC0_3D_CODE_CB_FLUSH 0x00001440
+
+#define NVC0_3D_CLIPID_HEIGHT 0x00001504
+#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000
+
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16
+
+#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16
+
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_2 0x00000004
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_3 0x00000008
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_4 0x00000010
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_5 0x00000020
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_6 0x00000040
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_7 0x00000080
+
+#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514
+
+#define NVC0_3D_POINT_SIZE 0x00001518
+
+#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520
+
+#define NVC0_3D_COUNTER_RESET 0x00001530
+#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001
+#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002
+#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003
+#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004
+#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010
+#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011
+#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012
+#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013
+#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015
+#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016
+#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017
+#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018
+#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a
+#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b
+#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c
+#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d
+#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e
+#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f
+
+#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534
+
+#define NVC0_3D_ZETA_ENABLE 0x00001538
+
+#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c
+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001
+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010
+
+#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550
+
+#define NVC0_3D_COND_ADDRESS_LOW 0x00001554
+
+#define NVC0_3D_COND_MODE 0x00001558
+#define NVC0_3D_COND_MODE_NEVER 0x00000000
+#define NVC0_3D_COND_MODE_ALWAYS 0x00000001
+#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_3D_COND_MODE_EQUAL 0x00000003
+#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560
+#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NVC0_3D_TSC_LIMIT 0x00001564
+#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff
+
+#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c
+
+#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570
+
+#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574
+
+#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578
+
+#define NVC0_3D_TIC_LIMIT 0x0000157c
+
+#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594
+
+#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_CSAA_ENABLE 0x000015b4
+
+#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8
+
+#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc
+
+#define NVC0_3D_LAYER 0x000015cc
+#define NVC0_3D_LAYER_IDX__MASK 0x0000ffff
+#define NVC0_3D_LAYER_IDX__SHIFT 0
+#define NVC0_3D_LAYER_USE_GP 0x00010000
+
+#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0
+#define NVC0_3D_MULTISAMPLE_MODE_1X 0x00000000
+#define NVC0_3D_MULTISAMPLE_MODE_2XMS 0x00000001
+#define NVC0_3D_MULTISAMPLE_MODE_4XMS 0x00000002
+#define NVC0_3D_MULTISAMPLE_MODE_8XMS 0x00000003
+#define NVC0_3D_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008
+#define NVC0_3D_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009
+#define NVC0_3D_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a
+
+#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000
+
+#define NVC0_3D_VERTEX_END_D3D 0x000015d8
+#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001
+#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002
+
+#define NVC0_3D_EDGEFLAG_ENABLE 0x000015e4
+
+#define NVC0_3D_VB_ELEMENT_U32 0x000015e8
+
+#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0
+
+#define NVC0_3D_VB_ELEMENT_U16 0x000015f0
+#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff
+#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0
+#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000
+#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16
+
+#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4
+
+#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8
+
+#define NVC0_3D_POINT_COORD_REPLACE 0x00001604
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004
+#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8
+#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3
+
+#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608
+
+#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c
+
+#define NVC0_3D_VERTEX_END_GL 0x00001614
+#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001
+#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002
+
+#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e
+#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000
+#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000
+
+#define NVC0_3D_VERTEX_DATA 0x00001640
+
+#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644
+
+#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648
+
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000
+
+#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658
+
+#define NVC0_3D_POINT_RASTER_RULES 0x0000165c
+#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000
+#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001
+
+#define NVC0_3D_TEX_MISC 0x00001664
+#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
+
+#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c
+
+#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680
+
+#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684
+
+#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688
+
+#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c
+
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0))
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020
+
+#define NVC0_3D_ZETA_BASE_LAYER 0x0000179c
+
+#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_HIGH 0x000017bc
+
+#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_LOW 0x000017c0
+
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE 0x000017c4
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE_16K 0x00000001
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE_32K 0x00000002
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE_64K 0x00000003
+
+#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0))
+#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004
+#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004
+
+#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc
+
+#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0
+
+#define NVC0_3D_UNK17BC_LIMIT 0x000017c4
+
+#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8
+
+#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc
+
+#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0
+
+#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4
+
+#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8
+
+#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc
+
+#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0
+
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0))
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020
+
+#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910
+
+#define NVC0_3D_CULL_FACE_ENABLE 0x00001918
+
+#define NVC0_3D_FRONT_FACE 0x0000191c
+#define NVC0_3D_FRONT_FACE_CW 0x00000900
+#define NVC0_3D_FRONT_FACE_CCW 0x00000901
+
+#define NVC0_3D_CULL_FACE 0x00001920
+#define NVC0_3D_CULL_FACE_FRONT 0x00000404
+#define NVC0_3D_CULL_FACE_BACK 0x00000405
+#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408
+
+#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c
+
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000
+
+#define NVC0_3D_CLIP_RECTS_EN 0x0000194c
+
+#define NVC0_3D_CLIP_RECTS_MODE 0x00001950
+#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000
+#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001
+#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002
+
+#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c
+#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001
+#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010
+
+#define NVC0_3D_CLIPID_ENABLE 0x0000197c
+
+#define NVC0_3D_CLIPID_WIDTH 0x00001980
+#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000
+#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040
+
+#define NVC0_3D_CLIPID_ID 0x00001984
+
+#define NVC0_3D_FP_CONTROL 0x000019a8
+#define NVC0_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001
+#define NVC0_3D_FP_CONTROL_EXPORTS_Z 0x00000100
+#define NVC0_3D_FP_CONTROL_USES_KIL 0x00100000
+
+#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc
+
+#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4
+
+#define NVC0_3D_LOGIC_OP 0x000019c8
+#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500
+#define NVC0_3D_LOGIC_OP_AND 0x00001501
+#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502
+#define NVC0_3D_LOGIC_OP_COPY 0x00001503
+#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504
+#define NVC0_3D_LOGIC_OP_NOOP 0x00001505
+#define NVC0_3D_LOGIC_OP_XOR 0x00001506
+#define NVC0_3D_LOGIC_OP_OR 0x00001507
+#define NVC0_3D_LOGIC_OP_NOR 0x00001508
+#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509
+#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a
+#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NVC0_3D_LOGIC_OP_NAND 0x0000150e
+#define NVC0_3D_LOGIC_OP_SET 0x0000150f
+
+#define NVC0_3D_CLEAR_BUFFERS 0x000019d0
+#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001
+#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002
+#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004
+#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008
+#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010
+#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020
+#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0
+#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6
+#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
+#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
+
+#define NVC0_3D_CLIPID_FILL 0x000019d4
+
+#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
+#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004
+#define NVC0_3D_COLOR_MASK__LEN 0x00000008
+#define NVC0_3D_COLOR_MASK_R 0x0000000f
+#define NVC0_3D_COLOR_MASK_G 0x000000f0
+#define NVC0_3D_COLOR_MASK_B 0x00000f00
+#define NVC0_3D_COLOR_MASK_A 0x0000f000
+
+#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NVC0_3D_QUERY_SEQUENCE 0x00001b08
+
+#define NVC0_3D_QUERY_GET 0x00001b0c
+#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003
+#define NVC0_3D_QUERY_GET_MODE__SHIFT 0
+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000
+#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001
+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002
+#define NVC0_3D_QUERY_GET_FENCE 0x00000010
+#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0
+#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5
+#define NVC0_3D_QUERY_GET_UNK8 0x00000100
+#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000
+#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12
+#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000
+#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16
+#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000
+#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000
+#define NVC0_3D_QUERY_GET_INTR 0x00100000
+#define NVC0_3D_QUERY_GET_UNK21 0x00200000
+#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000
+#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23
+#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000
+#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000
+#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000
+#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000
+#define NVC0_3D_QUERY_GET_SHORT 0x10000000
+
+#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020
+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff
+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0
+#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000
+
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020
+
+#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0))
+#define NVC0_3D_IBLEND__ESIZE 0x00000020
+#define NVC0_3D_IBLEND__LEN 0x00000008
+
+#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0))
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007
+#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0))
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0))
+
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0))
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0))
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020
+
+#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0))
+#define NVC0_3D_SP__ESIZE 0x00000040
+#define NVC0_3D_SP__LEN 0x00000006
+
+#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0))
+#define NVC0_3D_SP_SELECT_ENABLE 0x00000001
+#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070
+#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4
+#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000
+#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010
+#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020
+#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030
+#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040
+#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050
+
+#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0))
+
+#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0))
+
+#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0))
+#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010
+#define NVC0_3D_TEX_LIMITS__LEN 0x00000005
+
+#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0))
+#define NVC0_3D_FIRMWARE__ESIZE 0x00000004
+#define NVC0_3D_FIRMWARE__LEN 0x00000020
+
+#define NVC0_3D_CB_SIZE 0x00002380
+
+#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384
+
+#define NVC0_3D_CB_ADDRESS_LOW 0x00002388
+
+#define NVC0_3D_CB_POS 0x0000238c
+
+#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0))
+#define NVC0_3D_CB_DATA__ESIZE 0x00000004
+#define NVC0_3D_CB_DATA__LEN 0x00000010
+
+#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0))
+#define NVC0_3D_BIND_TSC__ESIZE 0x00000020
+#define NVC0_3D_BIND_TSC__LEN 0x00000005
+#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001
+#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0
+#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4
+#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000
+#define NVC0_3D_BIND_TSC_TSC__SHIFT 12
+
+#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0))
+#define NVC0_3D_BIND_TIC__ESIZE 0x00000020
+#define NVC0_3D_BIND_TIC__LEN 0x00000005
+#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001
+#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1
+#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NVC0_3D_BIND_TIC_TIC__SHIFT 9
+
+#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0))
+#define NVC0_3D_CB_BIND__ESIZE 0x00000020
+#define NVC0_3D_CB_BIND__LEN 0x00000005
+#define NVC0_3D_CB_BIND_VALID 0x00000001
+#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0
+#define NVC0_3D_CB_BIND_INDEX__SHIFT 4
+
+#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600
+
+#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1))
+#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
+#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020
+
+#define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808
+
+#define NVC0_3D_VERTEX_ARRAY_SELECT 0x00003820
+
+#define NVC0_3D_BLEND_ENABLES 0x00003858
+
+#define NVC0_3D_POLYGON_MODE_FRONT 0x00003868
+#define NVC0_3D_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NVC0_3D_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NVC0_3D_POLYGON_MODE_FRONT_FILL 0x00001b02
+
+#define NVC0_3D_POLYGON_MODE_BACK 0x00003870
+#define NVC0_3D_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NVC0_3D_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NVC0_3D_POLYGON_MODE_BACK_FILL 0x00001b02
+
+#define NVC0_3D_GP_SELECT 0x00003878
+
+#define NVC0_3D_TEP_SELECT 0x00003880
+
+
+#endif /* NVC0_3D_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
new file mode 100644
index 0000000000..84b152213a
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
@@ -0,0 +1,98 @@
+#ifndef NV_3DDEFS_XML
+#define NV_3DDEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38)
+- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28)
+- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000
+#define NV50_3D_BLEND_FACTOR_ONE 0x00004001
+#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308
+#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903
+
+#endif /* NV_3DDEFS_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
new file mode 100644
index 0000000000..5d2168e600
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+
+#include "nvc0_context.h"
+#include "nvc0_screen.h"
+#include "nvc0_resource.h"
+
+#include "nouveau/nouveau_reloc.h"
+
+static void
+nvc0_flush(struct pipe_context *pipe,
+ struct pipe_fence_handle **fence)
+{
+ struct nouveau_screen *screen = &nvc0_context(pipe)->screen->base;
+
+ if (fence)
+ nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
+
+ /* Try to emit before firing to avoid having to flush again right after
+ * in case we have to wait on this fence.
+ */
+ nouveau_fence_emit(screen->fence.current);
+
+ FIRE_RING(screen->channel);
+}
+
+static void
+nvc0_context_unreference_resources(struct nvc0_context *nvc0)
+{
+ unsigned s, i;
+
+ for (i = 0; i < NVC0_BUFCTX_COUNT; ++i)
+ nvc0_bufctx_reset(nvc0, i);
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
+ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
+
+ pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
+
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nvc0->num_textures[s]; ++i)
+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+
+ for (i = 0; i < 16; ++i)
+ pipe_resource_reference(&nvc0->constbuf[s][i], NULL);
+ }
+
+ for (i = 0; i < nvc0->num_tfbbufs; ++i)
+ pipe_resource_reference(&nvc0->tfbbuf[i], NULL);
+}
+
+static void
+nvc0_destroy(struct pipe_context *pipe)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0_context_unreference_resources(nvc0);
+
+ draw_destroy(nvc0->draw);
+
+ if (nvc0->screen->cur_ctx == nvc0) {
+ nvc0->screen->base.channel->user_private = NULL;
+ nvc0->screen->cur_ctx = NULL;
+ }
+
+ FREE(nvc0);
+}
+
+void
+nvc0_default_flush_notify(struct nouveau_channel *chan)
+{
+ struct nvc0_context *nvc0 = chan->user_private;
+
+ if (!nvc0)
+ return;
+
+ nouveau_fence_update(&nvc0->screen->base, TRUE);
+ nouveau_fence_next(&nvc0->screen->base);
+}
+
+struct pipe_context *
+nvc0_create(struct pipe_screen *pscreen, void *priv)
+{
+ struct pipe_winsys *pipe_winsys = pscreen->winsys;
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ struct nvc0_context *nvc0;
+ struct pipe_context *pipe;
+
+ nvc0 = CALLOC_STRUCT(nvc0_context);
+ if (!nvc0)
+ return NULL;
+ pipe = &nvc0->base.pipe;
+
+ nvc0->screen = screen;
+ nvc0->base.screen = &screen->base;
+ nvc0->base.copy_data = nvc0_m2mf_copy_linear;
+ nvc0->base.push_data = nvc0_m2mf_push_linear;
+
+ pipe->winsys = pipe_winsys;
+ pipe->screen = pscreen;
+ pipe->priv = priv;
+
+ pipe->destroy = nvc0_destroy;
+
+ pipe->draw_vbo = nvc0_draw_vbo;
+ pipe->clear = nvc0_clear;
+
+ pipe->flush = nvc0_flush;
+
+ if (!screen->cur_ctx)
+ screen->cur_ctx = nvc0;
+ screen->base.channel->user_private = nvc0;
+ screen->base.channel->flush_notify = nvc0_default_flush_notify;
+
+ nvc0_init_query_functions(nvc0);
+ nvc0_init_surface_functions(nvc0);
+ nvc0_init_state_functions(nvc0);
+ nvc0_init_resource_functions(pipe);
+
+ nvc0->draw = draw_create(pipe);
+ assert(nvc0->draw);
+ draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0));
+
+ return pipe;
+}
+
+struct resident {
+ struct nv04_resource *res;
+ uint32_t flags;
+};
+
+void
+nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx,
+ struct nv04_resource *resource, uint32_t flags)
+{
+ struct resident rsd = { resource, flags };
+
+ if (!resource->bo)
+ return;
+
+ /* We don't need to reference the resource here, it will be referenced
+ * in the context/state, and bufctx will be reset when state changes.
+ */
+ util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd);
+}
+
+void
+nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx,
+ struct nv04_resource *resource)
+{
+ struct resident *rsd, *top;
+ unsigned i;
+
+ for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) {
+ rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i);
+
+ if (rsd->res == resource) {
+ top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident);
+ if (rsd != top)
+ *rsd = *top;
+ break;
+ }
+ }
+}
+
+void
+nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0)
+{
+ struct resident *rsd;
+ struct util_dynarray *array;
+ unsigned ctx, i, n;
+
+ for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) {
+ array = &nvc0->residents[ctx];
+
+ n = array->size / sizeof(struct resident);
+ MARK_RING(nvc0->screen->base.channel, n, n);
+ for (i = 0; i < n; ++i) {
+ rsd = util_dynarray_element(array, struct resident, i);
+
+ nvc0_resource_validate(rsd->res, rsd->flags);
+ }
+ }
+
+ nvc0_screen_make_buffers_resident(nvc0->screen);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
new file mode 100644
index 0000000000..102997e4fc
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -0,0 +1,244 @@
+#ifndef __NVC0_CONTEXT_H__
+#define __NVC0_CONTEXT_H__
+
+#include <stdio.h>
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_inlines.h"
+#include "util/u_dynarray.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nvc0_winsys.h"
+#include "nvc0_stateobj.h"
+#include "nvc0_screen.h"
+#include "nvc0_program.h"
+#include "nvc0_resource.h"
+
+#include "nouveau/nouveau_context.h"
+
+#include "nvc0_3ddefs.xml.h"
+#include "nvc0_3d.xml.h"
+#include "nvc0_2d.xml.h"
+#include "nvc0_m2mf.xml.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
+
+#ifdef NOUVEAU_DEBUG
+# define NOUVEAU_DBG(args...) printf(args);
+#else
+# define NOUVEAU_DBG(args...)
+#endif
+
+#define NVC0_NEW_BLEND (1 << 0)
+#define NVC0_NEW_RASTERIZER (1 << 1)
+#define NVC0_NEW_ZSA (1 << 2)
+#define NVC0_NEW_VERTPROG (1 << 3)
+#define NVC0_NEW_TCTLPROG (1 << 4)
+#define NVC0_NEW_TEVLPROG (1 << 5)
+#define NVC0_NEW_GMTYPROG (1 << 6)
+#define NVC0_NEW_FRAGPROG (1 << 7)
+#define NVC0_NEW_BLEND_COLOUR (1 << 8)
+#define NVC0_NEW_STENCIL_REF (1 << 9)
+#define NVC0_NEW_CLIP (1 << 10)
+#define NVC0_NEW_SAMPLE_MASK (1 << 11)
+#define NVC0_NEW_FRAMEBUFFER (1 << 12)
+#define NVC0_NEW_STIPPLE (1 << 13)
+#define NVC0_NEW_SCISSOR (1 << 14)
+#define NVC0_NEW_VIEWPORT (1 << 15)
+#define NVC0_NEW_ARRAYS (1 << 16)
+#define NVC0_NEW_VERTEX (1 << 17)
+#define NVC0_NEW_CONSTBUF (1 << 18)
+#define NVC0_NEW_TEXTURES (1 << 19)
+#define NVC0_NEW_SAMPLERS (1 << 20)
+#define NVC0_NEW_TFB (1 << 21)
+#define NVC0_NEW_TFB_BUFFERS (1 << 22)
+
+#define NVC0_BUFCTX_CONSTANT 0
+#define NVC0_BUFCTX_FRAME 1
+#define NVC0_BUFCTX_VERTEX 2
+#define NVC0_BUFCTX_TEXTURES 3
+#define NVC0_BUFCTX_COUNT 4
+
+struct nvc0_context {
+ struct nouveau_context base;
+
+ struct nvc0_screen *screen;
+
+ struct util_dynarray residents[NVC0_BUFCTX_COUNT];
+
+ uint32_t dirty;
+
+ struct {
+ uint32_t instance_elts; /* bitmask of per-instance elements */
+ uint32_t instance_base;
+ int32_t index_bias;
+ boolean prim_restart;
+ boolean early_z;
+ uint8_t num_vtxbufs;
+ uint8_t num_vtxelts;
+ uint8_t num_textures[5];
+ uint8_t num_samplers[5];
+ uint8_t tls_required; /* bitmask of shader types using l[] */
+ uint16_t scissor;
+ uint32_t uniform_buffer_bound[5];
+ } state;
+
+ struct nvc0_blend_stateobj *blend;
+ struct nvc0_rasterizer_stateobj *rast;
+ struct nvc0_zsa_stateobj *zsa;
+ struct nvc0_vertex_stateobj *vertex;
+
+ struct nvc0_program *vertprog;
+ struct nvc0_program *tctlprog;
+ struct nvc0_program *tevlprog;
+ struct nvc0_program *gmtyprog;
+ struct nvc0_program *fragprog;
+
+ struct pipe_resource *constbuf[5][16];
+ uint16_t constbuf_dirty[5];
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned num_vtxbufs;
+ struct pipe_index_buffer idxbuf;
+ uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
+ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
+ unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */
+ unsigned vbo_max_index;
+
+ struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS];
+ unsigned num_textures[5];
+ struct nv50_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS];
+ unsigned num_samplers[5];
+
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_blend_color blend_colour;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_poly_stipple stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_clip_state clip;
+
+ unsigned sample_mask;
+
+ boolean vbo_push_hint;
+
+ struct nvc0_transform_feedback_state *tfb;
+ struct pipe_resource *tfbbuf[4];
+ unsigned num_tfbbufs;
+ unsigned tfb_offset[4];
+
+ struct draw_context *draw;
+};
+
+#define NVC0_USING_EDGEFLAG(ctx) \
+ ((ctx)->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)
+
+static INLINE struct nvc0_context *
+nvc0_context(struct pipe_context *pipe)
+{
+ return (struct nvc0_context *)pipe;
+}
+
+struct nvc0_surface {
+ struct pipe_surface base;
+ uint32_t offset;
+ uint32_t width;
+ uint16_t height;
+ uint16_t depth;
+};
+
+static INLINE struct nvc0_surface *
+nvc0_surface(struct pipe_surface *ps)
+{
+ return (struct nvc0_surface *)ps;
+}
+
+/* nvc0_context.c */
+struct pipe_context *nvc0_create(struct pipe_screen *, void *);
+
+void nvc0_default_flush_notify(struct nouveau_channel *);
+
+void nvc0_bufctx_emit_relocs(struct nvc0_context *);
+void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx,
+ struct nv04_resource *, uint32_t flags);
+void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx,
+ struct nv04_resource *);
+static INLINE void
+nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx)
+{
+ util_dynarray_resize(&nvc0->residents[ctx], 0);
+}
+
+/* nvc0_draw.c */
+extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
+
+/* nvc0_program.c */
+boolean nvc0_program_translate(struct nvc0_program *);
+void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
+
+/* nvc0_query.c */
+void nvc0_init_query_functions(struct nvc0_context *);
+
+/* nvc0_shader_state.c */
+void nvc0_vertprog_validate(struct nvc0_context *);
+void nvc0_tctlprog_validate(struct nvc0_context *);
+void nvc0_tevlprog_validate(struct nvc0_context *);
+void nvc0_gmtyprog_validate(struct nvc0_context *);
+void nvc0_fragprog_validate(struct nvc0_context *);
+
+void nvc0_tfb_validate(struct nvc0_context *);
+
+/* nvc0_state.c */
+extern void nvc0_init_state_functions(struct nvc0_context *);
+
+/* nvc0_state_validate.c */
+extern boolean nvc0_state_validate(struct nvc0_context *);
+
+/* nvc0_surface.c */
+extern void nvc0_clear(struct pipe_context *, unsigned buffers,
+ const float *rgba, double depth, unsigned stencil);
+extern void nvc0_init_surface_functions(struct nvc0_context *);
+
+/* nvc0_tex.c */
+void nvc0_validate_textures(struct nvc0_context *);
+void nvc0_validate_samplers(struct nvc0_context *);
+
+struct pipe_sampler_view *
+nvc0_create_sampler_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *);
+
+/* nvc0_transfer.c */
+void
+nvc0_m2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, void *data);
+void
+nvc0_m2mf_copy_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size);
+
+/* nvc0_vbo.c */
+void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
+
+void *
+nvc0_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements);
+void
+nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
+
+void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0);
+
+/* nvc0_push.c */
+void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
+void nvc0_push_vbo2(struct nvc0_context *, const struct pipe_draw_info *);
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_draw.c b/src/gallium/drivers/nvc0/nvc0_draw.c
new file mode 100644
index 0000000000..ac7e9f66a1
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_draw.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_pipe.h"
+
+#include "nvc0_context.h"
+
+struct nvc0_render_stage {
+ struct draw_stage stage;
+ struct nvc0_context *nvc0;
+};
+
+static INLINE struct nvc0_render_stage *
+nvc0_render_stage(struct draw_stage *stage)
+{
+ return (struct nvc0_render_stage *)stage;
+}
+
+static void
+nvc0_render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_flush(struct draw_stage *stage, unsigned flags)
+{
+}
+
+static void
+nvc0_render_reset_stipple_counter(struct draw_stage *stage)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_destroy(struct draw_stage *stage)
+{
+ FREE(stage);
+}
+
+struct draw_stage *
+nvc0_draw_render_stage(struct nvc0_context *nvc0)
+{
+ struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage);
+
+ rs->nvc0 = nvc0;
+ rs->stage.draw = nvc0->draw;
+ rs->stage.destroy = nvc0_render_destroy;
+ rs->stage.point = nvc0_render_point;
+ rs->stage.line = nvc0_render_line;
+ rs->stage.tri = nvc0_render_tri;
+ rs->stage.flush = nvc0_render_flush;
+ rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter;
+
+ return &rs->stage;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c
new file mode 100644
index 0000000000..454c744063
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_formats.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_screen.h"
+#include "nvc0_3d.xml.h"
+#include "nv50/nv50_defs.xml.h"
+#include "nv50/nv50_texture.xml.h"
+#include "pipe/p_defines.h"
+
+#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
+ (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \
+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
+ (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \
+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
+ (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \
+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
+ (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \
+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
+ NV50_TIC_0_FMT_##sz, \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##sz | \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t0 | \
+ (r << 31)
+
+#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
+ (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \
+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
+ (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \
+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
+ (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \
+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
+ (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \
+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
+ NV50_TIC_0_FMT_##sz, 0
+
+#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER
+#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW
+#define RENDER_TARGET PIPE_BIND_RENDER_TARGET
+#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL
+#define SCANOUT PIPE_BIND_SCANOUT
+
+/* for vertex buffers: */
+#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8
+#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16
+#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32
+
+const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
+{
+ /* COMMON FORMATS */
+
+ [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM,
+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM,
+ A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB,
+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB,
+ A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM,
+ B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
+ SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM,
+ B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
+ SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B5G5R5X1_UNORM] = { 0,
+ B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
+ SAMPLER_VIEW | SCANOUT },
+
+ [PIPE_FORMAT_B4G4R4A4_UNORM] = { 0,
+ B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_B4G4R4X4_UNORM] = { 0,
+ B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
+ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
+
+ [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM,
+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1),
+ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
+
+ /* DEPTH/STENCIL FORMATS */
+
+ [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM,
+ B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z16, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM,
+ B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, S8Z24, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM,
+ B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, X8Z24, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_Z24S8_UNORM,
+ B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, Z24S8, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT,
+ B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, Z32, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = {
+ NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
+ B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, X24S8Z32, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ /* LUMINANCE, ALPHA, INTENSITY */
+
+ [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_I16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
+ A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 16, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM,
+ A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_A16_UNORM] = { 0,
+ A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L8A8_UNORM] = { 0,
+ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L8A8_SRGB] = { 0,
+ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L16A16_UNORM] = { 0,
+ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L4A4_UNORM] = { 0,
+ B_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 4_4, 0),
+ SAMPLER_VIEW },
+
+ /* DXT, RGTC */
+
+ [PIPE_FORMAT_DXT1_RGB] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT1_RGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT3_RGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT5_RGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT1_SRGB] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT1_SRGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT3_SRGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT5_SRGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC1_UNORM] = { 0,
+ B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC1_SNORM] = { 0,
+ B_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC2_UNORM] = { 0,
+ B_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC2_SNORM] = { 0,
+ B_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
+ SAMPLER_VIEW },
+
+ /* FLOAT 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT,
+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT,
+ A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT,
+ A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* FLOAT 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT,
+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT,
+ A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT,
+ A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* ODD FORMATS */
+
+ [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT,
+ B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
+ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0),
+ SAMPLER_VIEW },
+
+ /* SNORM 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0,
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32B32_SNORM] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32_SNORM] = { 0,
+ A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32_SNORM] = { 0,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* UNORM 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32B32_UNORM] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32_UNORM] = { 0,
+ A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32_UNORM] = { 0,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* SNORM 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM,
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16B16_SNORM] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM,
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* UNORM 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16B16_UNORM] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* SNORM 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM,
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8_SNORM] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM,
+ A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* UNORM 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM,
+ A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB,
+ A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM,
+ A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* SSCALED 32 (not integer, converted to float on fetch !) */
+
+ [PIPE_FORMAT_R32G32B32A32_SSCALED] = { 0,
+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R32G32B32_SSCALED] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R32G32_SSCALED] = { 0,
+ A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R32_SSCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
+ VERTEX_BUFFER },
+
+ /* USCALED 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_USCALED] = { 0,
+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R32G32B32_USCALED] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R32G32_USCALED] = { 0,
+ A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R32_USCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32, 0),
+ VERTEX_BUFFER },
+
+ /* SSCALED 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_SSCALED] = { 0,
+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R16G16B16_SSCALED] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R16G16_SSCALED] = { 0,
+ A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R16_SSCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
+ VERTEX_BUFFER },
+
+ /* USCALED 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_USCALED] = { 0,
+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R16G16B16_USCALED] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R16G16_USCALED] = { 0,
+ A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R16_USCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16, 0),
+ VERTEX_BUFFER },
+
+ /* SSCALED 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_SSCALED] = { 0,
+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R8G8B8_SSCALED] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R8G8_SSCALED] = { 0,
+ A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R8_SSCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
+ VERTEX_BUFFER },
+
+ /* USCALED 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_USCALED] = { 0,
+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R8G8B8_USCALED] = { 0,
+ A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R8G8_USCALED] = { 0,
+ A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
+ VERTEX_BUFFER },
+
+ [PIPE_FORMAT_R8_USCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8, 0),
+ VERTEX_BUFFER },
+
+ /* OTHER FORMATS */
+
+ [PIPE_FORMAT_R8G8_B8G8_UNORM] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C1_C2_C1_C0, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_G8R8_G8B8_UNORM] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C2_C1_C0_C1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8SG8SB8UX8U_NORM] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R5SG5SB6U_NORM] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 6_5_5, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R1_UNORM] = { 0,
+ B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, BITMAP_8X8, 0),
+ SAMPLER_VIEW },
+};
diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
new file mode 100644
index 0000000000..b7d0d3eafa
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
@@ -0,0 +1,235 @@
+
+#ifndef __NVC0_PGRAPH_MACROS_H__
+#define __NVC0_PGRAPH_MACROS_H__
+
+/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1
+ * with bits [src:src+size) in r2
+ *
+ * bra(n)z annul: no delay slot
+ */
+
+/* The comments above the macros describe what they *should* be doing,
+ * but we use less functionality for now.
+ */
+
+/*
+ * for (i = 0; i < 8; ++i)
+ * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg);
+ *
+ * [3428] = arg;
+ *
+ * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0)
+ * [0d9c] = 0;
+ * else
+ * [0d9c] = [342c];
+ */
+static const uint32_t nvc0_9097_blend_enables[] =
+{
+ 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */
+ 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */
+ 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */
+ 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */
+ 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */
+ 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */
+ 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */
+ 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */
+ 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */
+};
+
+/*
+ * uint64 limit = (parm(0) << 32) | parm(1);
+ * uint64 start = (parm(2) << 32);
+ *
+ * if (limit) {
+ * start |= parm(3);
+ * --limit;
+ * } else {
+ * start |= 1;
+ * }
+ *
+ * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff;
+ * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff;
+ * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff;
+ * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff;
+ */
+static const uint32_t nvc0_9097_vertex_array_select[] =
+{
+ 0x00000201, /* 0x00: parm $r2 */
+ 0x00000301, /* 0x01: parm $r3 */
+ 0x00000401, /* 0x02: parm $r4 */
+ 0x00000501, /* 0x03: parm $r5 */
+ 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */
+ 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */
+ 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */
+ 0x00002041, /* 0x07: send $r4 */
+ 0x00002841, /* 0x08: send $r5 */
+ 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */
+ 0x000010c1, /* 0x0a: exit send $r2 */
+ 0x00001841, /* 0x0b: send $r3 */
+};
+
+static const uint32_t nvc0_9097_color_mask_brdc[] =
+{
+ 0x05a00021, /* maddr [NVC0_3D_COLOR_MASK(0), increment = 4] */
+ 0x00000841, /* send $r1 */
+ 0x00000841, /* send $r1 */
+ 0x00000841, /* send $r1 */
+ 0x00000841, /* send $r1 */
+ 0x00000841, /* send $r1 */
+ 0x00000841, /* send $r1 */
+ 0x000008c1, /* exit send $r1 */
+ 0x00000841, /* send $r1 */
+};
+
+/*
+ * [GL_POLYGON_MODE_FRONT] = arg;
+ *
+ * if (BIT(31 of [0x3410]))
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
+ * [02ec] = 0;
+ * else
+ * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE)
+ * [02ec] = BYTE(1 of [0x3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [0x3410]) << 4;
+ */
+static const uint32_t nvc0_9097_poly_mode_front[] =
+{
+ 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */
+ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
+ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
+ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x00dac021, /* 0x08: maddr 0x36b */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
+ 0x00003041 /* 0x10: send $r6 */
+};
+
+/*
+ * [GL_POLYGON_MODE_BACK] = arg;
+ *
+ * if (BIT(31 of [0x3410]))
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
+ * [02ec] = 0;
+ * else
+ * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE)
+ * [02ec] = BYTE(1 of [0x3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [0x3410]) << 4;
+ */
+/* NOTE: 0x3410 = 0x80002006 by default,
+ * POLYGON_MODE == GL_LINE check replaced by (MODE & 1)
+ * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1
+ */
+static const uint32_t nvc0_9097_poly_mode_back[] =
+{
+ 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */
+ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
+ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
+ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x00db0021, /* 0x08: maddr 0x36c */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
+ 0x00003041 /* 0x10: send $r6 */
+};
+
+/*
+ * [NVC0_3D_SP_SELECT(4)] = arg
+ *
+ * if BIT(31 of [0x3410]) == 0
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41)
+ * [02ec] = 0
+ * else
+ * if (any POLYGON MODE == LINE)
+ * [02ec] = BYTE(1 of [3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1
+ */
+static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */
+{
+ 0x00dac215, /* 0x00: read $r2 0x36b */
+ 0x00db0315, /* 0x01: read $r3 0x36c */
+ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
+ 0x020c0415, /* 0x03: read $r4 0x830 */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x02100021, /* 0x08: maddr 0x840 */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
+ 0x00003041, /* 0x10: send $r6 */
+};
+
+/*
+ * [NVC0_3D_SP_SELECT(3)] = arg
+ *
+ * if BIT(31 of [0x3410]) == 0
+ * [1a24] = 0x7353;
+ *
+ * if (arg == 0x31) {
+ * if (BIT(2 of [0x3430])) {
+ * int i = 15; do { --i; } while(i);
+ * [0x1a2c] = 0;
+ * }
+ * }
+ *
+ * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31)
+ * [02ec] = 0
+ * else
+ * if ([any POLYGON_MODE] == GL_LINE)
+ * [02ec] = BYTE(1 of [3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [3410]) << 4;
+ */
+static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */
+{
+ 0x00dac215, /* 0x00: read $r2 0x36b */
+ 0x00db0315, /* 0x01: read $r3 0x36c */
+ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
+ 0x02100415, /* 0x03: read $r4 0x840 */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x020c0021, /* 0x08: maddr 0x830 */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
+ 0x00003041, /* 0x10: send $r6 */
+};
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
new file mode 100644
index 0000000000..3bf628d425
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
@@ -0,0 +1,138 @@
+#ifndef NVC0_M2MF_XML
+#define NVC0_M2MF_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_M2MF_TILING_MODE_IN 0x00000204
+
+#define NVC0_M2MF_TILING_PITCH_IN 0x00000208
+
+#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c
+
+#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210
+
+#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214
+
+#define NVC0_M2MF_TILING_MODE_OUT 0x00000220
+
+#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224
+
+#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228
+
+#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c
+
+#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230
+
+#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238
+
+#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c
+
+#define NVC0_M2MF_EXEC 0x00000300
+#define NVC0_M2MF_EXEC_PUSH 0x00000001
+#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010
+#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100
+#define NVC0_M2MF_EXEC_NOTIFY 0x00002000
+#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000
+#define NVC0_M2MF_EXEC_INC__SHIFT 20
+
+#define NVC0_M2MF_DATA 0x00000304
+
+#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c
+
+#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310
+
+#define NVC0_M2MF_PITCH_IN 0x00000314
+
+#define NVC0_M2MF_PITCH_OUT 0x00000318
+
+#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c
+
+#define NVC0_M2MF_LINE_COUNT 0x00000320
+
+#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c
+
+#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330
+
+#define NVC0_M2MF_NOTIFY 0x00000334
+
+#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344
+
+#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348
+
+#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c
+
+#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350
+
+
+#endif /* NVC0_M2MF_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c
new file mode 100644
index 0000000000..bced324552
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_miptree.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+#include "nvc0_transfer.h"
+
+static INLINE uint32_t
+get_tile_dims(unsigned nx, unsigned ny, unsigned nz)
+{
+ uint32_t tile_mode = 0x000;
+
+ if (ny > 64) tile_mode = 0x040; /* height 128 tiles */
+ else
+ if (ny > 32) tile_mode = 0x030; /* height 64 tiles */
+ else
+ if (ny > 16) tile_mode = 0x020; /* height 32 tiles */
+ else
+ if (ny > 8) tile_mode = 0x010; /* height 16 tiles */
+
+ if (nz == 1)
+ return tile_mode;
+ else
+ if (tile_mode > 0x020)
+ tile_mode = 0x020;
+
+ if (nz > 16 && tile_mode < 0x020)
+ return tile_mode | 0x500; /* depth 32 tiles */
+ if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */
+ if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */
+ if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */
+
+ return tile_mode | 0x100;
+}
+
+uint32_t
+nvc0_miptree_zslice_offset(struct nvc0_miptree *mt, unsigned l, unsigned z)
+{
+ unsigned nblocksy; /* height of texture level aligned to tile height */
+
+ unsigned stride_2d; /* to next slice within a 3D tile */
+ unsigned stride_3d; /* to slice in the next (in z direction !) 3D tile */
+
+ unsigned tile_d_shift = NVC0_TILE_DIM_SHIFT(mt->level[l].tile_mode, 2);
+ unsigned tile_d = 1 << tile_d_shift;
+
+ nblocksy = util_format_get_nblocksy(mt->base.base.format,
+ u_minify(mt->base.base.height0, l));
+
+ nblocksy = align(nblocksy, NVC0_TILE_HEIGHT(mt->level[l].tile_mode));
+
+ stride_2d = NVC0_TILE_SIZE_2D(mt->level[l].tile_mode);
+
+ stride_3d = (nblocksy * mt->level[l].pitch) << tile_d_shift;
+
+ return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d;
+}
+
+static void
+nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
+{
+ struct nvc0_miptree *mt = nvc0_miptree(pt);
+
+ nouveau_screen_bo_release(pscreen, mt->base.bo);
+
+ FREE(mt);
+}
+
+static boolean
+nvc0_miptree_get_handle(struct pipe_screen *pscreen,
+ struct pipe_resource *pt,
+ struct winsys_handle *whandle)
+{
+ struct nvc0_miptree *mt = nvc0_miptree(pt);
+ unsigned stride;
+
+ if (!mt || !mt->base.bo)
+ return FALSE;
+
+ stride = util_format_get_stride(mt->base.base.format,
+ mt->base.base.width0);
+
+ return nouveau_screen_bo_get_handle(pscreen,
+ mt->base.bo,
+ stride,
+ whandle);
+}
+
+const struct u_resource_vtbl nvc0_miptree_vtbl =
+{
+ nvc0_miptree_get_handle, /* get_handle */
+ nvc0_miptree_destroy, /* resource_destroy */
+ nvc0_miptree_transfer_new, /* get_transfer */
+ nvc0_miptree_transfer_del, /* transfer_destroy */
+ nvc0_miptree_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ nvc0_miptree_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *
+nvc0_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ)
+{
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree);
+ struct pipe_resource *pt = &mt->base.base;
+ int ret;
+ unsigned w, h, d, l, alloc_size;
+ uint32_t tile_flags;
+
+ if (!mt)
+ return NULL;
+
+ mt->base.vtbl = &nvc0_miptree_vtbl;
+ *pt = *templ;
+ pipe_reference_init(&pt->reference, 1);
+ pt->screen = pscreen;
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ w = pt->width0;
+ h = pt->height0;
+ d = mt->layout_3d ? pt->depth0 : 1;
+
+ switch (pt->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ tile_flags = 0x0700; /* COMPRESSED */
+ tile_flags = 0x0100; /* NORMAL */
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ tile_flags = 0x5300; /* MSAA 4, COMPRESSED */
+ tile_flags = 0x4600; /* NORMAL */
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ tile_flags = 0x1100; /* NORMAL */
+ if (w * h >= 128 * 128 && 0)
+ tile_flags = 0x1700; /* COMPRESSED, requires magic */
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ tile_flags = 0xf500; /* COMPRESSED */
+ tile_flags = 0xf700; /* MSAA 2 */
+ tile_flags = 0xf900; /* MSAA 4 */
+ tile_flags = 0xfe00; /* NORMAL */
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+ tile_flags = 0xce00; /* COMPRESSED */
+ tile_flags = 0xcf00; /* MSAA 2, COMPRESSED */
+ tile_flags = 0xd000; /* MSAA 4, COMPRESSED */
+ tile_flags = 0xc300; /* NORMAL */
+ break;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ tile_flags = 0xe900; /* COMPRESSED */
+ tile_flags = 0xfe00; /* NORMAL */
+ break;
+ default:
+ tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */
+ tile_flags = 0xfe00; /* NORMAL 32 BIT */
+ if (w * h >= 128 * 128 && 0)
+ tile_flags = 0xdb00; /* COMPRESSED 32 BIT, requires magic */
+ break;
+ }
+
+ /* For 3D textures, a mipmap is spanned by all the layers, for array
+ * textures and cube maps, each layer contains its own mipmaps.
+ */
+ for (l = 0; l <= pt->last_level; ++l) {
+ struct nvc0_miptree_level *lvl = &mt->level[l];
+ unsigned nbx = util_format_get_nblocksx(pt->format, w);
+ unsigned nby = util_format_get_nblocksy(pt->format, h);
+ unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ lvl->offset = mt->total_size;
+ lvl->tile_mode = get_tile_dims(nbx, nby, d);
+ lvl->pitch = align(nbx * blocksize, NVC0_TILE_PITCH(lvl->tile_mode));
+
+ mt->total_size += lvl->pitch *
+ align(nby, NVC0_TILE_HEIGHT(lvl->tile_mode)) *
+ align(d, NVC0_TILE_DEPTH(lvl->tile_mode));
+
+ w = u_minify(w, 1);
+ h = u_minify(h, 1);
+ d = u_minify(d, 1);
+ }
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size,
+ NVC0_TILE_SIZE(mt->level[0].tile_mode));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+
+ alloc_size = mt->total_size;
+ if (tile_flags == 0x1700)
+ alloc_size *= 3; /* HiZ, XXX: correct size */
+
+ ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size,
+ mt->level[0].tile_mode, tile_flags,
+ &mt->base.bo);
+ if (ret) {
+ FREE(mt);
+ return NULL;
+ }
+ mt->base.domain = NOUVEAU_BO_VRAM;
+
+ return pt;
+}
+
+struct pipe_resource *
+nvc0_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ struct nvc0_miptree *mt;
+ unsigned stride;
+
+ /* only supports 2D, non-mipmapped textures for the moment */
+ if ((templ->target != PIPE_TEXTURE_2D &&
+ templ->target != PIPE_TEXTURE_RECT) ||
+ templ->last_level != 0 ||
+ templ->depth0 != 1 ||
+ templ->array_size > 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nvc0_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
+ if (mt->base.bo == NULL) {
+ FREE(mt);
+ return NULL;
+ }
+
+ mt->base.base = *templ;
+ mt->base.vtbl = &nvc0_miptree_vtbl;
+ pipe_reference_init(&mt->base.base.reference, 1);
+ mt->base.base.screen = pscreen;
+ mt->level[0].pitch = stride;
+ mt->level[0].offset = 0;
+ mt->level[0].tile_mode = mt->base.bo->tile_mode;
+
+ /* no need to adjust bo reference count */
+ return &mt->base.base;
+}
+
+
+/* Surface functions.
+ */
+
+struct pipe_surface *
+nvc0_miptree_surface_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *templ)
+{
+ struct nvc0_miptree *mt = nvc0_miptree(pt); /* guaranteed */
+ struct nvc0_surface *ns;
+ struct pipe_surface *ps;
+ struct nvc0_miptree_level *lvl = &mt->level[templ->u.tex.level];
+
+ ns = CALLOC_STRUCT(nvc0_surface);
+ if (!ns)
+ return NULL;
+ ps = &ns->base;
+
+ pipe_reference_init(&ps->reference, 1);
+ pipe_resource_reference(&ps->texture, pt);
+ ps->context = pipe;
+ ps->format = templ->format;
+ ps->usage = templ->usage;
+ ps->u.tex.level = templ->u.tex.level;
+ ps->u.tex.first_layer = templ->u.tex.first_layer;
+ ps->u.tex.last_layer = templ->u.tex.last_layer;
+
+ ns->width = u_minify(pt->width0, ps->u.tex.level);
+ ns->height = u_minify(pt->height0, ps->u.tex.level);
+ ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1;
+ ns->offset = lvl->offset;
+
+ /* comment says there are going to be removed, but they're used by the st */
+ ps->width = ns->width;
+ ps->height = ns->height;
+
+ return ps;
+}
+
+void
+nvc0_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps)
+{
+ struct nvc0_surface *s = nvc0_surface(ps);
+
+ pipe_resource_reference(&ps->texture, NULL);
+
+ FREE(s);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c
new file mode 100644
index 0000000000..bd85a7f1ff
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc.c
@@ -0,0 +1,716 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define NOUVEAU_DEBUG 1
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+uint8_t
+nvc0_ir_reverse_cc(uint8_t cc)
+{
+ static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+
+ return cc_swapped[cc & 7] | (cc & ~7);
+}
+
+boolean
+nvc0_insn_can_load(struct nv_instruction *nvi, int s,
+ struct nv_instruction *ld)
+{
+ int i;
+
+ if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) {
+ if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s)))
+ return FALSE;
+ if (!(nvc0_op_info_table[nvi->opcode].immediate & 4))
+ if (ld->src[0]->value->reg.imm.u32 & 0xfff)
+ return FALSE;
+ } else
+ if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s)))
+ return FALSE;
+
+ if (ld->indirect >= 0)
+ return FALSE;
+
+ /* a few ops can use g[] sources directly, but we don't support g[] yet */
+ if (ld->src[0]->value->reg.file == NV_FILE_MEM_L ||
+ ld->src[0]->value->reg.file == NV_FILE_MEM_G)
+ return FALSE;
+
+ for (i = 0; i < 3 && nvi->src[i]; ++i)
+ if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
+ return FALSE;
+
+ return TRUE;
+}
+
+/* Return whether this instruction can be executed conditionally. */
+boolean
+nvc0_insn_is_predicateable(struct nv_instruction *nvi)
+{
+ if (nvi->predicate >= 0) /* already predicated */
+ return FALSE;
+ if (!nvc0_op_info_table[nvi->opcode].predicate &&
+ !nvc0_op_info_table[nvi->opcode].pseudo)
+ return FALSE;
+ return TRUE;
+}
+
+int
+nvc0_insn_refcount(struct nv_instruction *nvi)
+{
+ int rc = 0;
+ int i;
+ for (i = 0; i < 5 && nvi->def[i]; ++i) {
+ if (!nvi->def[i])
+ return rc;
+ rc += nvi->def[i]->refc;
+ }
+ return rc;
+}
+
+int
+nvc0_pc_replace_value(struct nv_pc *pc,
+ struct nv_value *old_val,
+ struct nv_value *new_val)
+{
+ int i, n, s;
+
+ if (old_val == new_val)
+ return old_val->refc;
+
+ for (i = 0, n = 0; i < pc->num_refs; ++i) {
+ if (pc->refs[i]->value == old_val) {
+ ++n;
+ for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s)
+ if (pc->refs[i]->insn->src[s] == pc->refs[i])
+ break;
+ assert(s < 6);
+ nv_reference(pc, pc->refs[i]->insn, s, new_val);
+ }
+ }
+ return n;
+}
+
+static INLINE boolean
+is_gpr63(struct nv_value *val)
+{
+ return (val->reg.file == NV_FILE_GPR && val->reg.id == 63);
+}
+
+struct nv_value *
+nvc0_pc_find_constant(struct nv_ref *ref)
+{
+ struct nv_value *src;
+
+ if (!ref)
+ return NULL;
+
+ src = ref->value;
+ while (src->insn && src->insn->opcode == NV_OP_MOV) {
+ assert(!src->insn->src[0]->mod);
+ src = src->insn->src[0]->value;
+ }
+ if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) ||
+ (src->insn &&
+ src->insn->opcode == NV_OP_LD &&
+ src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+ src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
+ return src;
+ return NULL;
+}
+
+struct nv_value *
+nvc0_pc_find_immediate(struct nv_ref *ref)
+{
+ struct nv_value *src = nvc0_pc_find_constant(ref);
+
+ return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL;
+}
+
+static void
+nv_pc_free_refs(struct nv_pc *pc)
+{
+ int i;
+ for (i = 0; i < pc->num_refs; i += 64)
+ FREE(pc->refs[i]);
+ FREE(pc->refs);
+}
+
+static const char *
+edge_name(ubyte type)
+{
+ switch (type) {
+ case CFG_EDGE_FORWARD: return "forward";
+ case CFG_EDGE_BACK: return "back";
+ case CFG_EDGE_LOOP_ENTER: return "loop";
+ case CFG_EDGE_LOOP_LEAVE: return "break";
+ case CFG_EDGE_FAKE: return "fake";
+ default:
+ return "?";
+ }
+}
+
+void
+nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
+ void *priv)
+{
+ struct nv_basic_block *bb[64], *bbb[16], *b;
+ int j, p, pp;
+
+ bb[0] = root;
+ p = 1;
+ pp = 0;
+
+ while (p > 0) {
+ b = bb[--p];
+ b->priv = 0;
+
+ for (j = 1; j >= 0; --j) {
+ if (!b->out[j])
+ continue;
+
+ switch (b->out_kind[j]) {
+ case CFG_EDGE_BACK:
+ continue;
+ case CFG_EDGE_FORWARD:
+ case CFG_EDGE_FAKE:
+ if (++b->out[j]->priv == b->out[j]->num_in)
+ bb[p++] = b->out[j];
+ break;
+ case CFG_EDGE_LOOP_ENTER:
+ bb[p++] = b->out[j];
+ break;
+ case CFG_EDGE_LOOP_LEAVE:
+ if (!b->out[j]->priv) {
+ bbb[pp++] = b->out[j];
+ b->out[j]->priv = 1;
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ f(priv, b);
+
+ if (!p) {
+ p = pp;
+ for (; pp > 0; --pp)
+ bb[pp - 1] = bbb[pp - 1];
+ }
+ }
+}
+
+static void
+nv_do_print_function(void *priv, struct nv_basic_block *b)
+{
+ struct nv_instruction *i;
+
+ debug_printf("=== BB %i ", b->id);
+ if (b->out[0])
+ debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
+ if (b->out[1])
+ debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
+ debug_printf("===\n");
+
+ i = b->phi;
+ if (!i)
+ i = b->entry;
+ for (; i; i = i->next)
+ nvc0_print_instruction(i);
+}
+
+void
+nvc0_print_function(struct nv_basic_block *root)
+{
+ if (root->subroutine)
+ debug_printf("SUBROUTINE %i\n", root->subroutine);
+ else
+ debug_printf("MAIN\n");
+
+ nvc0_pc_pass_in_order(root, nv_do_print_function, root);
+}
+
+void
+nvc0_print_program(struct nv_pc *pc)
+{
+ int i;
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i])
+ nvc0_print_function(pc->root[i]);
+}
+
+#if NOUVEAU_DEBUG > 1
+static void
+nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
+{
+ int i;
+
+ b->pass_seq = pc->pass_seq;
+
+ fprintf(f, "\t%i [shape=box]\n", b->id);
+
+ for (i = 0; i < 2; ++i) {
+ if (!b->out[i])
+ continue;
+ switch (b->out_kind[i]) {
+ case CFG_EDGE_FORWARD:
+ fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
+ break;
+ case CFG_EDGE_LOOP_ENTER:
+ fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
+ break;
+ case CFG_EDGE_LOOP_LEAVE:
+ fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
+ break;
+ case CFG_EDGE_BACK:
+ fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
+ continue;
+ case CFG_EDGE_FAKE:
+ fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ if (b->out[i]->pass_seq < pc->pass_seq)
+ nv_do_print_cfgraph(pc, f, b->out[i]);
+ }
+}
+
+/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
+static void
+nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
+{
+ FILE *f;
+
+ f = fopen(filepath, "a");
+ if (!f)
+ return;
+
+ fprintf(f, "digraph G {\n");
+
+ ++pc->pass_seq;
+
+ nv_do_print_cfgraph(pc, f, pc->root[subr]);
+
+ fprintf(f, "}\n");
+
+ fclose(f);
+}
+#endif
+
+static INLINE void
+nvc0_pc_print_binary(struct nv_pc *pc)
+{
+ unsigned i;
+
+ NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8);
+
+ for (i = 0; i < pc->emit_size / 4; i += 2) {
+ debug_printf("0x%08x ", pc->emit[i + 0]);
+ debug_printf("0x%08x ", pc->emit[i + 1]);
+ if ((i % 16) == 15)
+ debug_printf("\n");
+ }
+ debug_printf("\n");
+}
+
+static int
+nvc0_emit_program(struct nv_pc *pc)
+{
+ uint32_t *code = pc->emit;
+ int n;
+
+ NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size);
+
+ pc->emit_pos = 0;
+ for (n = 0; n < pc->num_blocks; ++n) {
+ struct nv_instruction *i;
+ struct nv_basic_block *b = pc->bb_list[n];
+
+ for (i = b->entry; i; i = i->next) {
+ nvc0_emit_instruction(pc, i);
+ pc->emit += 2;
+ pc->emit_pos += 8;
+ }
+ }
+ assert(pc->emit == &code[pc->emit_size / 4]);
+
+ pc->emit[0] = 0x00001de7;
+ pc->emit[1] = 0x80000000;
+ pc->emit_size += 8;
+
+ pc->emit = code;
+
+#ifdef NOUVEAU_DEBUG
+ nvc0_pc_print_binary(pc);
+#else
+ debug_printf("not printing binary\n");
+#endif
+ return 0;
+}
+
+int
+nvc0_generate_code(struct nvc0_translation_info *ti)
+{
+ struct nv_pc *pc;
+ int ret;
+ int i;
+
+ pc = CALLOC_STRUCT(nv_pc);
+ if (!pc)
+ return 1;
+
+ pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT;
+
+ pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0]));
+ if (!pc->root) {
+ FREE(pc);
+ return 1;
+ }
+ pc->num_subroutines = ti->num_subrs;
+
+ ret = nvc0_tgsi_to_nc(pc, ti);
+ if (ret)
+ goto out;
+#if NOUVEAU_DEBUG > 1
+ nvc0_print_program(pc);
+#endif
+
+ pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE;
+
+ /* optimization */
+ ret = nvc0_pc_exec_pass0(pc);
+ if (ret)
+ goto out;
+#ifdef NOUVEAU_DEBUG
+ nvc0_print_program(pc);
+#endif
+
+ /* register allocation */
+ ret = nvc0_pc_exec_pass1(pc);
+ if (ret)
+ goto out;
+#if NOUVEAU_DEBUG > 1
+ nvc0_print_program(pc);
+ nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0);
+#endif
+
+ /* prepare for emission */
+ ret = nvc0_pc_exec_pass2(pc);
+ if (ret)
+ goto out;
+ assert(!(pc->emit_size % 8));
+
+ pc->emit = CALLOC(pc->emit_size / 4 + 2, 4);
+ if (!pc->emit) {
+ ret = 3;
+ goto out;
+ }
+ ret = nvc0_emit_program(pc);
+ if (ret)
+ goto out;
+
+ ti->prog->code = pc->emit;
+ ti->prog->code_base = 0;
+ ti->prog->code_size = pc->emit_size;
+ ti->prog->parm_size = 0;
+
+ ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1);
+
+ ti->prog->relocs = pc->reloc_entries;
+ ti->prog->num_relocs = pc->num_relocs;
+
+ NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
+
+out:
+ nv_pc_free_refs(pc);
+
+ for (i = 0; i < pc->num_blocks; ++i)
+ FREE(pc->bb_list[i]);
+ if (pc->root)
+ FREE(pc->root);
+ if (ret) {
+ /* on success, these will be referenced by struct nvc0_program */
+ if (pc->emit)
+ FREE(pc->emit);
+ if (pc->immd_buf)
+ FREE(pc->immd_buf);
+ if (pc->reloc_entries)
+ FREE(pc->reloc_entries);
+ }
+ FREE(pc);
+ return ret;
+}
+
+static void
+nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
+{
+ if (!b->phi) {
+ i->prev = NULL;
+ b->phi = i;
+ i->next = b->entry;
+ if (b->entry) {
+ assert(!b->entry->prev && b->exit);
+ b->entry->prev = i;
+ } else {
+ b->entry = i;
+ b->exit = i;
+ }
+ } else {
+ assert(b->entry);
+ if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
+ assert(b->entry == b->exit);
+ b->entry->next = i;
+ i->prev = b->entry;
+ b->entry = i;
+ b->exit = i;
+ } else { /* insert before entry */
+ assert(b->entry->prev && b->exit);
+ i->next = b->entry;
+ i->prev = b->entry->prev;
+ b->entry->prev = i;
+ i->prev->next = i;
+ }
+ }
+}
+
+void
+nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
+{
+ if (i->opcode == NV_OP_PHI) {
+ nvbb_insert_phi(b, i);
+ } else {
+ i->prev = b->exit;
+ if (b->exit)
+ b->exit->next = i;
+ b->exit = i;
+ if (!b->entry)
+ b->entry = i;
+ else
+ if (i->prev && i->prev->opcode == NV_OP_PHI)
+ b->entry = i;
+ }
+
+ i->bb = b;
+ b->num_instructions++;
+
+ if (i->prev && i->prev->terminator)
+ nvc0_insns_permute(i->prev, i);
+}
+
+void
+nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
+{
+ if (!at->next) {
+ nvc0_insn_append(at->bb, ni);
+ return;
+ }
+ ni->next = at->next;
+ ni->prev = at;
+ ni->next->prev = ni;
+ ni->prev->next = ni;
+ ni->bb = at->bb;
+ ni->bb->num_instructions++;
+}
+
+void
+nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni)
+{
+ nvc0_insn_insert_after(at, ni);
+ nvc0_insns_permute(at, ni);
+}
+
+void
+nvc0_insn_delete(struct nv_instruction *nvi)
+{
+ struct nv_basic_block *b = nvi->bb;
+ int s;
+
+ /* debug_printf("REM: "); nv_print_instruction(nvi); */
+
+ for (s = 0; s < 6 && nvi->src[s]; ++s)
+ nv_reference(NULL, nvi, s, NULL);
+
+ if (nvi->next)
+ nvi->next->prev = nvi->prev;
+ else {
+ assert(nvi == b->exit);
+ b->exit = nvi->prev;
+ }
+
+ if (nvi->prev)
+ nvi->prev->next = nvi->next;
+
+ if (nvi == b->entry) {
+ /* PHIs don't get hooked to b->entry */
+ b->entry = nvi->next;
+ assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
+ }
+
+ if (nvi == b->phi) {
+ if (nvi->opcode != NV_OP_PHI)
+ NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n");
+
+ assert(!nvi->prev);
+ if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
+ b->phi = NULL;
+ else
+ b->phi = nvi->next;
+ }
+}
+
+void
+nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2)
+{
+ struct nv_basic_block *b = i1->bb;
+
+ assert(i1->opcode != NV_OP_PHI &&
+ i2->opcode != NV_OP_PHI);
+ assert(i1->next == i2);
+
+ if (b->exit == i2)
+ b->exit = i1;
+
+ if (b->entry == i1)
+ b->entry = i2;
+
+ i2->prev = i1->prev;
+ i1->next = i2->next;
+ i2->next = i1;
+ i1->prev = i2;
+
+ if (i2->prev)
+ i2->prev->next = i2;
+ if (i1->next)
+ i1->next->prev = i1;
+}
+
+void
+nvc0_bblock_attach(struct nv_basic_block *parent,
+ struct nv_basic_block *b, ubyte edge_kind)
+{
+ assert(b->num_in < 8);
+
+ if (parent->out[0]) {
+ assert(!parent->out[1]);
+ parent->out[1] = b;
+ parent->out_kind[1] = edge_kind;
+ } else {
+ parent->out[0] = b;
+ parent->out_kind[0] = edge_kind;
+ }
+
+ b->in[b->num_in] = parent;
+ b->in_kind[b->num_in++] = edge_kind;
+}
+
+/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
+
+boolean
+nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
+{
+ int j;
+
+ if (b == d)
+ return TRUE;
+
+ for (j = 0; j < b->num_in; ++j)
+ if ((b->in_kind[j] != CFG_EDGE_BACK) &&
+ !nvc0_bblock_dominated_by(b->in[j], d))
+ return FALSE;
+
+ return j ? TRUE : FALSE;
+}
+
+/* check if @bf (future) can be reached from @bp (past), stop at @bt */
+boolean
+nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
+ struct nv_basic_block *bt)
+{
+ struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
+ int i, p, n;
+
+ p = 0;
+ n = 1;
+ q[0] = bp;
+
+ while (p < n) {
+ b = q[p++];
+
+ if (b == bf)
+ break;
+ if (b == bt)
+ continue;
+ assert(n <= (1024 - 2));
+
+ for (i = 0; i < 2; ++i) {
+ if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
+ q[n] = b->out[i];
+ q[n++]->priv = 1;
+ }
+ }
+ }
+ for (--n; n >= 0; --n)
+ q[n]->priv = 0;
+
+ return (b == bf);
+}
+
+static struct nv_basic_block *
+nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
+{
+ struct nv_basic_block *out;
+ int i;
+
+ if (!nvc0_bblock_dominated_by(df, b)) {
+ for (i = 0; i < df->num_in; ++i) {
+ if (df->in_kind[i] == CFG_EDGE_BACK)
+ continue;
+ if (nvc0_bblock_dominated_by(df->in[i], b))
+ return df;
+ }
+ }
+ for (i = 0; i < 2 && df->out[i]; ++i) {
+ if (df->out_kind[i] == CFG_EDGE_BACK)
+ continue;
+ if ((out = nvbb_find_dom_frontier(b, df->out[i])))
+ return out;
+ }
+ return NULL;
+}
+
+struct nv_basic_block *
+nvc0_bblock_dom_frontier(struct nv_basic_block *b)
+{
+ struct nv_basic_block *df;
+ int i;
+
+ for (i = 0; i < 2 && b->out[i]; ++i)
+ if ((df = nvbb_find_dom_frontier(b, b->out[i])))
+ return df;
+ return NULL;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
new file mode 100644
index 0000000000..3a5612a5fa
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc.h
@@ -0,0 +1,650 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NVC0_COMPILER_H__
+#define __NVC0_COMPILER_H__
+
+#include <stdio.h>
+
+#ifndef NOUVEAU_DBG
+#ifdef NOUVEAU_DEBUG
+# define NOUVEAU_DBG(args...) debug_printf(args);
+#else
+# define NOUVEAU_DBG(args...)
+#endif
+#endif
+
+#ifndef NOUVEAU_ERR
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
+#endif
+
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_double_list.h"
+
+/* pseudo opcodes */
+#define NV_OP_UNDEF 0
+#define NV_OP_BIND 1
+#define NV_OP_MERGE 2
+#define NV_OP_PHI 3
+#define NV_OP_SELECT 4
+#define NV_OP_NOP 5
+
+/**
+ * BIND forces source operand i into the same register as destination operand i,
+ * and the operands will be assigned consecutive registers (needed for TEX).
+ * Beware conflicts !
+ * SELECT forces its multiple source operands and its destination operand into
+ * one and the same register.
+ */
+
+/* base opcodes */
+#define NV_OP_LD 6
+#define NV_OP_ST 7
+#define NV_OP_MOV 8
+#define NV_OP_AND 9
+#define NV_OP_OR 10
+#define NV_OP_XOR 11
+#define NV_OP_SHL 12
+#define NV_OP_SHR 13
+#define NV_OP_NOT 14
+#define NV_OP_SET 15
+#define NV_OP_ADD 16
+#define NV_OP_SUB 17
+#define NV_OP_MUL 18
+#define NV_OP_MAD 19
+#define NV_OP_ABS 20
+#define NV_OP_NEG 21
+#define NV_OP_MAX 22
+#define NV_OP_MIN 23
+#define NV_OP_CVT 24
+#define NV_OP_CEIL 25
+#define NV_OP_FLOOR 26
+#define NV_OP_TRUNC 27
+#define NV_OP_SAD 28
+
+/* shader opcodes */
+#define NV_OP_VFETCH 29
+#define NV_OP_PFETCH 30
+#define NV_OP_EXPORT 31
+#define NV_OP_LINTERP 32
+#define NV_OP_PINTERP 33
+#define NV_OP_EMIT 34
+#define NV_OP_RESTART 35
+#define NV_OP_TEX 36
+#define NV_OP_TXB 37
+#define NV_OP_TXL 38
+#define NV_OP_TXF 39
+#define NV_OP_TXQ 40
+#define NV_OP_QUADOP 41
+#define NV_OP_DFDX 42
+#define NV_OP_DFDY 43
+#define NV_OP_KIL 44
+
+/* control flow opcodes */
+#define NV_OP_BRA 45
+#define NV_OP_CALL 46
+#define NV_OP_RET 47
+#define NV_OP_EXIT 48
+#define NV_OP_BREAK 49
+#define NV_OP_BREAKADDR 50
+#define NV_OP_JOINAT 51
+#define NV_OP_JOIN 52
+
+/* typed opcodes */
+#define NV_OP_ADD_F32 NV_OP_ADD
+#define NV_OP_ADD_B32 53
+#define NV_OP_MUL_F32 NV_OP_MUL
+#define NV_OP_MUL_B32 54
+#define NV_OP_ABS_F32 NV_OP_ABS
+#define NV_OP_ABS_S32 55
+#define NV_OP_NEG_F32 NV_OP_NEG
+#define NV_OP_NEG_S32 56
+#define NV_OP_MAX_F32 NV_OP_MAX
+#define NV_OP_MAX_S32 57
+#define NV_OP_MAX_U32 58
+#define NV_OP_MIN_F32 NV_OP_MIN
+#define NV_OP_MIN_S32 59
+#define NV_OP_MIN_U32 60
+#define NV_OP_SET_F32 61
+#define NV_OP_SET_S32 62
+#define NV_OP_SET_U32 63
+#define NV_OP_SAR 64
+#define NV_OP_RCP 65
+#define NV_OP_RSQ 66
+#define NV_OP_LG2 67
+#define NV_OP_SIN 68
+#define NV_OP_COS 69
+#define NV_OP_EX2 70
+#define NV_OP_PRESIN 71
+#define NV_OP_PREEX2 72
+#define NV_OP_SAT 73
+
+/* newly added opcodes */
+#define NV_OP_SET_F32_AND 74
+#define NV_OP_SET_F32_OR 75
+#define NV_OP_SET_F32_XOR 76
+#define NV_OP_SELP 77
+#define NV_OP_SLCT 78
+#define NV_OP_SLCT_F32 NV_OP_SLCT
+#define NV_OP_SLCT_S32 79
+#define NV_OP_SLCT_U32 80
+#define NV_OP_SUB_F32 NV_OP_SUB
+#define NV_OP_SUB_S32 81
+#define NV_OP_MAD_F32 NV_OP_MAD
+#define NV_OP_FSET_F32 82
+#define NV_OP_TXG 83
+
+#define NV_OP_COUNT 84
+
+/* nv50 files omitted */
+#define NV_FILE_GPR 0
+#define NV_FILE_COND 1
+#define NV_FILE_PRED 2
+#define NV_FILE_IMM 16
+#define NV_FILE_MEM_S 32
+#define NV_FILE_MEM_V 34
+#define NV_FILE_MEM_A 35
+#define NV_FILE_MEM_L 48
+#define NV_FILE_MEM_G 64
+#define NV_FILE_MEM_C(i) (80 + i)
+
+#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S)
+
+#define NV_MOD_NEG 1
+#define NV_MOD_ABS 2
+#define NV_MOD_NOT 4
+#define NV_MOD_SAT 8
+
+#define NV_TYPE_U8 0x00
+#define NV_TYPE_S8 0x01
+#define NV_TYPE_U16 0x02
+#define NV_TYPE_S16 0x03
+#define NV_TYPE_U32 0x04
+#define NV_TYPE_S32 0x05
+#define NV_TYPE_P32 0x07
+#define NV_TYPE_F32 0x09
+#define NV_TYPE_F64 0x0b
+#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
+#define NV_TYPE_ANY 0xff
+
+#define NV_TYPE_ISINT(t) ((t) < 7)
+#define NV_TYPE_ISSGD(t) ((t) & 1)
+
+#define NV_CC_FL 0x0
+#define NV_CC_LT 0x1
+#define NV_CC_EQ 0x2
+#define NV_CC_LE 0x3
+#define NV_CC_GT 0x4
+#define NV_CC_NE 0x5
+#define NV_CC_GE 0x6
+#define NV_CC_U 0x8
+#define NV_CC_TR 0xf
+#define NV_CC_O 0x10
+#define NV_CC_C 0x11
+#define NV_CC_A 0x12
+#define NV_CC_S 0x13
+#define NV_CC_INVERSE(cc) ((cc) ^ 0x7)
+/* for 1 bit predicates: */
+#define NV_CC_P 0
+#define NV_CC_NOT_P 1
+
+uint8_t nvc0_ir_reverse_cc(uint8_t cc);
+
+#define NV_PC_MAX_INSTRUCTIONS 2048
+#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
+
+#define NV_PC_MAX_BASIC_BLOCKS 1024
+
+struct nv_op_info {
+ uint base; /* e.g. ADD_S32 -> ADD */
+ char name[12];
+ uint8_t type;
+ uint16_t mods;
+ unsigned flow : 1;
+ unsigned commutative : 1;
+ unsigned vector : 1;
+ unsigned predicate : 1;
+ unsigned pseudo : 1;
+ unsigned immediate : 3;
+ unsigned memory : 3;
+};
+
+extern struct nv_op_info nvc0_op_info_table[];
+
+#define NV_BASEOP(op) (nvc0_op_info_table[op].base)
+#define NV_OPTYPE(op) (nvc0_op_info_table[op].type)
+
+static INLINE boolean
+nv_is_texture_op(uint opcode)
+{
+ return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ);
+}
+
+static INLINE boolean
+nv_is_vector_op(uint opcode)
+{
+ return nvc0_op_info_table[opcode].vector ? TRUE : FALSE;
+}
+
+static INLINE boolean
+nv_op_commutative(uint opcode)
+{
+ return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE;
+}
+
+static INLINE uint8_t
+nv_op_supported_src_mods(uint opcode, int s)
+{
+ return (nvc0_op_info_table[opcode].mods >> (s * 4)) & 0xf;
+}
+
+static INLINE uint
+nv_type_order(ubyte type)
+{
+ switch (type & 0xf) {
+ case NV_TYPE_U8:
+ case NV_TYPE_S8:
+ return 0;
+ case NV_TYPE_U16:
+ case NV_TYPE_S16:
+ return 1;
+ case NV_TYPE_U32:
+ case NV_TYPE_F32:
+ case NV_TYPE_S32:
+ case NV_TYPE_P32:
+ return 2;
+ case NV_TYPE_F64:
+ return 3;
+ }
+ assert(0);
+ return 0;
+}
+
+static INLINE uint
+nv_type_sizeof(ubyte type)
+{
+ if (type & 0xf0)
+ return (1 << nv_type_order(type)) * (type >> 4);
+ return 1 << nv_type_order(type);
+}
+
+static INLINE uint
+nv_type_sizeof_base(ubyte type)
+{
+ return 1 << nv_type_order(type);
+}
+
+struct nv_reg {
+ uint32_t address; /* for memory locations */
+ int id; /* for registers */
+ ubyte file;
+ ubyte size;
+ union {
+ int32_t s32;
+ int64_t s64;
+ uint64_t u64;
+ uint32_t u32; /* expected to be 0 for $r63 */
+ float f32;
+ double f64;
+ } imm;
+};
+
+struct nv_range {
+ struct nv_range *next;
+ int bgn;
+ int end;
+};
+
+struct nv_ref;
+
+struct nv_value {
+ struct nv_reg reg;
+ struct nv_instruction *insn;
+ struct nv_value *join;
+ struct nv_ref *last_use;
+ int n;
+ struct nv_range *livei;
+ int refc;
+ struct nv_value *next;
+ struct nv_value *prev;
+};
+
+struct nv_ref {
+ struct nv_value *value;
+ struct nv_instruction *insn;
+ struct list_head list; /* connects uses of the same value */
+ uint8_t mod;
+ uint8_t flags;
+};
+
+#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0)
+
+struct nv_basic_block;
+
+struct nv_instruction {
+ struct nv_instruction *next;
+ struct nv_instruction *prev;
+ uint opcode;
+ uint serial;
+
+ struct nv_value *def[5];
+ struct nv_ref *src[6];
+
+ int8_t predicate; /* index of predicate src */
+ int8_t indirect; /* index of pointer src */
+
+ union {
+ struct {
+ uint8_t t; /* TIC binding */
+ uint8_t s; /* TSC binding */
+ } tex;
+ struct {
+ uint8_t d; /* output type */
+ uint8_t s; /* input type */
+ } cvt;
+ } ext;
+
+ struct nv_basic_block *bb;
+ struct nv_basic_block *target; /* target block of control flow insn */
+
+ unsigned cc : 5; /* condition code */
+ unsigned fixed : 1; /* don't optimize away (prematurely) */
+ unsigned terminator : 1;
+ unsigned join : 1;
+ unsigned set_cond : 4; /* 2nd byte */
+ unsigned saturate : 1;
+ unsigned centroid : 1;
+ unsigned flat : 1;
+ unsigned patch : 1;
+ unsigned lanes : 4; /* 3rd byte */
+ unsigned tex_dim : 2;
+ unsigned tex_array : 1;
+ unsigned tex_cube : 1;
+ unsigned tex_shadow : 1; /* 4th byte */
+ unsigned tex_live : 1;
+ unsigned tex_mask : 4;
+
+ uint8_t quadop;
+};
+
+static INLINE int
+nvi_vector_size(struct nv_instruction *nvi)
+{
+ int i;
+ assert(nvi);
+ for (i = 0; i < 5 && nvi->def[i]; ++i);
+ return i;
+}
+
+#define CFG_EDGE_FORWARD 0
+#define CFG_EDGE_BACK 1
+#define CFG_EDGE_LOOP_ENTER 2
+#define CFG_EDGE_LOOP_LEAVE 4
+#define CFG_EDGE_FAKE 8
+
+/* 'WALL' edge means where reachability check doesn't follow */
+/* 'LOOP' edge means just having to do with loops */
+#define IS_LOOP_EDGE(k) ((k) & 7)
+#define IS_WALL_EDGE(k) ((k) & 9)
+
+struct nv_basic_block {
+ struct nv_instruction *entry; /* first non-phi instruction */
+ struct nv_instruction *exit;
+ struct nv_instruction *phi; /* very first instruction */
+ int num_instructions;
+
+ struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
+ struct nv_basic_block *in[8]; /* hope that suffices */
+ uint num_in;
+ ubyte out_kind[2];
+ ubyte in_kind[8];
+
+ int id;
+ int subroutine;
+ uint priv; /* reset to 0 after you're done */
+ uint pass_seq;
+
+ uint32_t emit_pos; /* position, size in emitted code (in bytes) */
+ uint32_t emit_size;
+
+ uint32_t live_set[NV_PC_MAX_VALUES / 32];
+};
+
+struct nvc0_translation_info;
+
+struct nv_pc {
+ struct nv_basic_block **root;
+ struct nv_basic_block *current_block;
+ struct nv_basic_block *parent_block;
+
+ int loop_nesting_bound;
+ uint pass_seq;
+
+ struct nv_value values[NV_PC_MAX_VALUES];
+ struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
+ struct nv_ref **refs;
+ struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS];
+ int num_values;
+ int num_instructions;
+ int num_refs;
+ int num_blocks;
+ int num_subroutines;
+
+ int max_reg[4];
+
+ uint32_t *immd_buf; /* populated on emit */
+ unsigned immd_count;
+
+ uint32_t *emit;
+ uint32_t emit_size;
+ uint32_t emit_pos;
+
+ void *reloc_entries;
+ unsigned num_relocs;
+
+ /* optimization enables */
+ boolean opt_reload_elim;
+ boolean is_fragprog;
+};
+
+void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *);
+void nvc0_insn_insert_before(struct nv_instruction *, struct nv_instruction *);
+void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *);
+
+static INLINE struct nv_instruction *
+nv_alloc_instruction(struct nv_pc *pc, uint opcode)
+{
+ struct nv_instruction *insn;
+
+ insn = &pc->instructions[pc->num_instructions++];
+ assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
+
+ insn->opcode = opcode;
+ insn->cc = NV_CC_P;
+ insn->indirect = -1;
+ insn->predicate = -1;
+
+ return insn;
+}
+
+static INLINE struct nv_instruction *
+new_instruction(struct nv_pc *pc, uint opcode)
+{
+ struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
+
+ nvc0_insn_append(pc->current_block, insn);
+ return insn;
+}
+
+static INLINE struct nv_instruction *
+new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode)
+{
+ struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
+
+ nvc0_insn_insert_after(at, insn);
+ return insn;
+}
+
+static INLINE struct nv_value *
+new_value(struct nv_pc *pc, ubyte file, ubyte size)
+{
+ struct nv_value *value = &pc->values[pc->num_values];
+
+ assert(pc->num_values < NV_PC_MAX_VALUES - 1);
+
+ value->n = pc->num_values++;
+ value->join = value;
+ value->reg.id = -1;
+ value->reg.file = file;
+ value->reg.size = size;
+ return value;
+}
+
+static INLINE struct nv_value *
+new_value_like(struct nv_pc *pc, struct nv_value *like)
+{
+ return new_value(pc, like->reg.file, like->reg.size);
+}
+
+static INLINE struct nv_ref *
+new_ref(struct nv_pc *pc, struct nv_value *val)
+{
+ int i;
+ struct nv_ref *ref;
+
+ if ((pc->num_refs % 64) == 0) {
+ const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
+ const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
+
+ pc->refs = REALLOC(pc->refs, old_size, new_size);
+
+ ref = CALLOC(64, sizeof(struct nv_ref));
+ for (i = 0; i < 64; ++i)
+ pc->refs[pc->num_refs + i] = &ref[i];
+ }
+
+ ref = pc->refs[pc->num_refs++];
+ ref->value = val;
+
+ LIST_INITHEAD(&ref->list);
+
+ ++val->refc;
+ return ref;
+}
+
+static INLINE struct nv_basic_block *
+new_basic_block(struct nv_pc *pc)
+{
+ struct nv_basic_block *bb;
+
+ if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS)
+ return NULL;
+
+ bb = CALLOC_STRUCT(nv_basic_block);
+
+ bb->id = pc->num_blocks;
+ pc->bb_list[pc->num_blocks++] = bb;
+ return bb;
+}
+
+static INLINE void
+nv_reference(struct nv_pc *pc,
+ struct nv_instruction *nvi, int c, struct nv_value *s)
+{
+ struct nv_ref **d = &nvi->src[c];
+ assert(c < 6);
+
+ if (*d) {
+ --(*d)->value->refc;
+ LIST_DEL(&(*d)->list);
+ }
+
+ if (s) {
+ if (!*d) {
+ *d = new_ref(pc, s);
+ (*d)->insn = nvi;
+ } else {
+ LIST_DEL(&(*d)->list);
+ (*d)->value = s;
+ ++(s->refc);
+ }
+ if (!s->last_use)
+ s->last_use = *d;
+ else
+ LIST_ADDTAIL(&s->last_use->list, &(*d)->list);
+
+ s->last_use = *d;
+ (*d)->insn = nvi;
+ } else {
+ *d = NULL;
+ }
+}
+
+/* nvc0_emit.c */
+void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *);
+
+/* nvc0_print.c */
+const char *nvc0_opcode_name(uint opcode);
+void nvc0_print_instruction(struct nv_instruction *);
+
+/* nvc0_pc.c */
+void nvc0_print_function(struct nv_basic_block *root);
+void nvc0_print_program(struct nv_pc *);
+
+boolean nvc0_insn_can_load(struct nv_instruction *, int s,
+ struct nv_instruction *);
+boolean nvc0_insn_is_predicateable(struct nv_instruction *);
+
+int nvc0_insn_refcount(struct nv_instruction *);
+void nvc0_insn_delete(struct nv_instruction *);
+void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *);
+
+void nvc0_bblock_attach(struct nv_basic_block *parent,
+ struct nv_basic_block *child, ubyte edge_kind);
+boolean nvc0_bblock_dominated_by(struct nv_basic_block *,
+ struct nv_basic_block *);
+boolean nvc0_bblock_reachable_by(struct nv_basic_block *future,
+ struct nv_basic_block *past,
+ struct nv_basic_block *final);
+struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *);
+
+int nvc0_pc_replace_value(struct nv_pc *pc,
+ struct nv_value *old_val,
+ struct nv_value *new_val);
+
+struct nv_value *nvc0_pc_find_immediate(struct nv_ref *);
+struct nv_value *nvc0_pc_find_constant(struct nv_ref *);
+
+typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
+
+void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
+
+int nvc0_pc_exec_pass0(struct nv_pc *pc);
+int nvc0_pc_exec_pass1(struct nv_pc *pc);
+int nvc0_pc_exec_pass2(struct nv_pc *pc);
+
+int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *);
+
+#endif // NV50_COMPILER_H
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
new file mode 100644
index 0000000000..76ad40dbcf
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
@@ -0,0 +1,1020 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+#define NVC0_FIXUP_CODE_RELOC 0
+#define NVC0_FIXUP_DATA_RELOC 1
+
+struct nvc0_fixup {
+ uint8_t type;
+ int8_t shift;
+ uint32_t mask;
+ uint32_t data;
+ uint32_t ofst;
+};
+
+void
+nvc0_relocate_program(struct nvc0_program *prog,
+ uint32_t code_base,
+ uint32_t data_base)
+{
+ struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs;
+ unsigned i;
+
+ for (i = 0; i < prog->num_relocs; ++i) {
+ uint32_t data;
+
+ switch (f[i].type) {
+ case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break;
+ case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break;
+ default:
+ data = f[i].data;
+ break;
+ }
+ data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift);
+
+ prog->code[f[i].ofst / 4] &= ~f[i].mask;
+ prog->code[f[i].ofst / 4] |= data & f[i].mask;
+ }
+}
+
+static void
+create_fixup(struct nv_pc *pc, uint8_t ty,
+ int w, uint32_t data, uint32_t m, int s)
+{
+ struct nvc0_fixup *f;
+
+ const unsigned size = sizeof(struct nvc0_fixup);
+ const unsigned n = pc->num_relocs;
+
+ if (!(n % 8))
+ pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size);
+
+ f = (struct nvc0_fixup *)pc->reloc_entries;
+
+ f[n].ofst = pc->emit_pos + w * 4;
+ f[n].type = ty;
+ f[n].data = data;
+ f[n].mask = m;
+ f[n].shift = s;
+
+ ++pc->num_relocs;
+}
+
+static INLINE ubyte
+SSIZE(struct nv_instruction *nvi, int s)
+{
+ return nvi->src[s]->value->reg.size;
+}
+
+static INLINE ubyte
+DSIZE(struct nv_instruction *nvi, int d)
+{
+ return nvi->def[d]->reg.size;
+}
+
+static INLINE struct nv_reg *
+SREG(struct nv_ref *ref)
+{
+ if (!ref)
+ return NULL;
+ return &ref->value->join->reg;
+}
+
+static INLINE struct nv_reg *
+DREG(struct nv_value *val)
+{
+ if (!val)
+ return NULL;
+ return &val->join->reg;
+}
+
+static INLINE ubyte
+SFILE(struct nv_instruction *nvi, int s)
+{
+ return nvi->src[s]->value->reg.file;
+}
+
+static INLINE ubyte
+DFILE(struct nv_instruction *nvi, int d)
+{
+ return nvi->def[0]->reg.file;
+}
+
+static INLINE void
+SID(struct nv_pc *pc, struct nv_ref *ref, int pos)
+{
+ pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32);
+}
+
+static INLINE void
+DID(struct nv_pc *pc, struct nv_value *val, int pos)
+{
+ pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32);
+}
+
+static INLINE uint32_t
+get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */
+{
+ assert(ref->value->reg.file == NV_FILE_IMM);
+ return ref->value->reg.imm.u32;
+}
+
+static INLINE void
+set_immd_u32_l(struct nv_pc *pc, uint32_t u32)
+{
+ pc->emit[0] |= (u32 & 0x3f) << 26;
+ pc->emit[1] |= u32 >> 6;
+}
+
+static INLINE void
+set_immd_u32(struct nv_pc *pc, uint32_t u32)
+{
+ if ((pc->emit[0] & 0xf) == 0x2) {
+ set_immd_u32_l(pc, u32);
+ } else
+ if ((pc->emit[0] & 0xf) == 0x3) {
+ assert(!(pc->emit[1] & 0xc000));
+ pc->emit[1] |= 0xc000;
+ assert(!(u32 & 0xfff00000));
+ set_immd_u32_l(pc, u32);
+ } else {
+ assert(!(pc->emit[1] & 0xc000));
+ pc->emit[1] |= 0xc000;
+ assert(!(u32 & 0xfff));
+ set_immd_u32_l(pc, u32 >> 12);
+ }
+}
+
+static INLINE void
+set_immd(struct nv_pc *pc, struct nv_instruction *i, int s)
+{
+ set_immd_u32(pc, get_immd_u32(i->src[s]));
+}
+
+static INLINE void
+DVS(struct nv_pc *pc, struct nv_instruction *i)
+{
+ uint s = i->def[0]->reg.size;
+ int n;
+ for (n = 1; n < 4 && i->def[n]; ++n)
+ s += i->def[n]->reg.size;
+ pc->emit[0] |= ((s / 4) - 1) << 5;
+}
+
+static INLINE void
+SVS(struct nv_pc *pc, struct nv_ref *src)
+{
+ pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5;
+}
+
+static void
+set_pred(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (i->predicate >= 0) {
+ SID(pc, i->src[i->predicate], 6);
+ if (i->cc)
+ pc->emit[0] |= 0x2000; /* negate */
+ } else {
+ pc->emit[0] |= 0x1c00;
+ }
+}
+
+static INLINE void
+set_address_16(struct nv_pc *pc, struct nv_ref *src)
+{
+ pc->emit[0] |= (src->value->reg.address & 0x003f) << 26;
+ pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6;
+}
+
+static INLINE unsigned
+const_space_index(struct nv_instruction *i, int s)
+{
+ return SFILE(i, s) - NV_FILE_MEM_C(0);
+}
+
+static void
+emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op)
+{
+ pc->emit[0] = 0x00000007;
+ pc->emit[1] = op << 24;
+
+ if (op == 0x40 || (op >= 0x80 && op <= 0x98)) {
+ /* bra, exit, ret or kil */
+ pc->emit[0] |= 0x1e0;
+ set_pred(pc, i);
+ }
+
+ if (i->target) {
+ int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8);
+
+ /* we will need relocations only for global functions */
+ /*
+ create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000);
+ create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff);
+ */
+
+ pc->emit[0] |= (pcrel & 0x3f) << 26;
+ pc->emit[1] |= (pcrel >> 6) & 0x3ffff;
+ }
+}
+
+/* doesn't work for vfetch, export, ld, st, mov ... */
+static void
+emit_form_0(struct nv_pc *pc, struct nv_instruction *i)
+{
+ int s;
+
+ set_pred(pc, i);
+
+ DID(pc, i->def[0], 14);
+
+ for (s = 0; s < 3 && i->src[s]; ++s) {
+ if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
+ SFILE(i, s) <= NV_FILE_MEM_C(15)) {
+ assert(!(pc->emit[1] & 0xc000));
+ assert(s <= 1);
+ pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
+ set_address_16(pc, i->src[s]);
+ } else
+ if (SFILE(i, s) == NV_FILE_GPR) {
+ SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20);
+ } else
+ if (SFILE(i, s) == NV_FILE_IMM) {
+ assert(!(pc->emit[1] & 0xc000));
+ assert(s == 1 || i->opcode == NV_OP_MOV);
+ set_immd(pc, i, s);
+ }
+ }
+}
+
+static void
+emit_form_1(struct nv_pc *pc, struct nv_instruction *i)
+{
+ int s;
+
+ set_pred(pc, i);
+
+ DID(pc, i->def[0], 14);
+
+ for (s = 0; s < 1 && i->src[s]; ++s) {
+ if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
+ SFILE(i, s) <= NV_FILE_MEM_C(15)) {
+ assert(!(pc->emit[1] & 0xc000));
+ assert(s <= 1);
+ pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
+ set_address_16(pc, i->src[s]);
+ } else
+ if (SFILE(i, s) == NV_FILE_GPR) {
+ SID(pc, i->src[s], 26);
+ } else
+ if (SFILE(i, s) == NV_FILE_IMM) {
+ assert(!(pc->emit[1] & 0xc000));
+ assert(s == 1 || i->opcode == NV_OP_MOV);
+ set_immd(pc, i, s);
+ }
+ }
+}
+
+static void
+emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (i->src[0]->mod & NV_MOD_ABS)
+ pc->emit[0] |= 1 << 7;
+ if (i->src[0]->mod & NV_MOD_NEG)
+ pc->emit[0] |= 1 << 9;
+ if (i->src[1]->mod & NV_MOD_ABS)
+ pc->emit[0] |= 1 << 6;
+ if (i->src[1]->mod & NV_MOD_NEG)
+ pc->emit[0] |= 1 << 8;
+}
+
+static void
+emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x50000000;
+
+ emit_form_0(pc, i);
+
+ emit_neg_abs_1_2(pc, i);
+
+ if (i->saturate)
+ pc->emit[1] |= 1 << 17;
+}
+
+static void
+emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x58000000;
+
+ emit_form_0(pc, i);
+
+ if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
+ pc->emit[1] |= 1 << 25;
+
+ if (i->saturate)
+ pc->emit[0] |= 1 << 5;
+}
+
+static void
+emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x30000000;
+
+ emit_form_0(pc, i);
+
+ if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
+ pc->emit[0] |= 1 << 9;
+
+ if (i->src[2]->mod & NV_MOD_NEG)
+ pc->emit[0] |= 1 << 8;
+
+ if (i->saturate)
+ pc->emit[0] |= 1 << 5;
+}
+
+static void
+emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x08000000;
+
+ if (NV_BASEOP(i->opcode) == NV_OP_MAX)
+ pc->emit[1] |= 0x001e0000;
+ else
+ pc->emit[1] |= 0x000e0000; /* predicate ? */
+
+ emit_form_0(pc, i);
+
+ emit_neg_abs_1_2(pc, i);
+
+ switch (i->opcode) {
+ case NV_OP_MIN_U32:
+ case NV_OP_MAX_U32:
+ pc->emit[0] |= 3;
+ break;
+ case NV_OP_MIN_S32:
+ case NV_OP_MAX_S32:
+ pc->emit[0] |= 3 | (1 << 5);
+ break;
+ case NV_OP_MIN_F32:
+ case NV_OP_MAX_F32:
+ default:
+ break;
+ }
+}
+
+static void
+emit_tex(struct nv_pc *pc, struct nv_instruction *i)
+{
+ int src1 = i->tex_array + i->tex_dim + i->tex_cube;
+
+ assert(src1 < 6);
+
+ pc->emit[0] = 0x00000086;
+ pc->emit[1] = 0x80000000;
+
+ switch (i->opcode) {
+ case NV_OP_TEX: pc->emit[1] = 0x80000000; break;
+ case NV_OP_TXB: pc->emit[1] = 0x84000000; break;
+ case NV_OP_TXL: pc->emit[1] = 0x86000000; break;
+ case NV_OP_TXF: pc->emit[1] = 0x90000000; break;
+ case NV_OP_TXG: pc->emit[1] = 0xe0000000; break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (i->tex_array)
+ pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */
+ if (i->tex_shadow)
+ pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */
+
+ set_pred(pc, i);
+
+ DID(pc, i->def[0], 14);
+ SID(pc, i->src[0], 20);
+ SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */
+
+ pc->emit[1] |= i->tex_mask << 14;
+ pc->emit[1] |= (i->tex_dim - 1) << 20;
+ if (i->tex_cube)
+ pc->emit[1] |= 3 << 20;
+
+ assert(i->ext.tex.s < 16);
+
+ pc->emit[1] |= i->ext.tex.t;
+ pc->emit[1] |= i->ext.tex.s << 8;
+
+ if (i->tex_live)
+ pc->emit[0] |= 1 << 9;
+}
+
+/* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */
+static void
+emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0xc8000000;
+
+ set_pred(pc, i);
+
+ DID(pc, i->def[0], 14);
+ SID(pc, i->src[0], 20);
+
+ pc->emit[0] |= op << 26;
+
+ if (op >= 3) {
+ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9;
+ if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7;
+ } else {
+ assert(!i->src[0]->mod);
+ }
+}
+
+static void
+emit_quadop(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x48000000;
+
+ set_pred(pc, i);
+
+ assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR);
+
+ DID(pc, i->def[0], 14);
+ SID(pc, i->src[0], 20);
+ SID(pc, i->src[0], 26);
+
+ pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */
+ pc->emit[1] |= i->quadop;
+}
+
+static void
+emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
+{
+ i->quadop = 0x99;
+ i->lanes = 4;
+ i->src[1] = i->src[0];
+ emit_quadop(pc, i);
+}
+
+static void
+emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
+{
+ i->quadop = 0xa5;
+ i->lanes = 5;
+ i->src[1] = i->src[0];
+ emit_quadop(pc, i);
+}
+
+/* preparation op (preex2, presin / convert to fixed point) */
+static void
+emit_preop(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x60000000;
+
+ if (i->opcode == NV_OP_PREEX2)
+ pc->emit[0] |= 0x20;
+
+ emit_form_1(pc, i);
+
+ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8;
+ if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6;
+}
+
+static void
+emit_shift(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000003;
+
+ switch (i->opcode) {
+ case NV_OP_SAR:
+ pc->emit[0] |= 0x20; /* fall through */
+ case NV_OP_SHR:
+ pc->emit[1] = 0x58000000;
+ break;
+ case NV_OP_SHL:
+ default:
+ pc->emit[1] = 0x60000000;
+ break;
+ }
+
+ emit_form_0(pc, i);
+}
+
+static void
+emit_bitop(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (SFILE(i, 1) == NV_FILE_IMM) {
+ pc->emit[0] = 0x00000002;
+ pc->emit[1] = 0x38000000;
+ } else {
+ pc->emit[0] = 0x00000003;
+ pc->emit[1] = 0x68000000;
+ }
+
+ switch (i->opcode) {
+ case NV_OP_OR:
+ pc->emit[0] |= 0x40;
+ break;
+ case NV_OP_XOR:
+ pc->emit[0] |= 0x80;
+ break;
+ case NV_OP_AND:
+ default:
+ break;
+ }
+
+ emit_form_0(pc, i);
+}
+
+static void
+emit_set(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+
+ switch (i->opcode) {
+ case NV_OP_SET_S32:
+ pc->emit[0] |= 0x20; /* fall through */
+ case NV_OP_SET_U32:
+ pc->emit[0] |= 0x3;
+ pc->emit[1] = 0x100e0000;
+ break;
+ case NV_OP_SET_F32_AND:
+ pc->emit[1] = 0x18000000;
+ break;
+ case NV_OP_SET_F32_OR:
+ pc->emit[1] = 0x18200000;
+ break;
+ case NV_OP_SET_F32_XOR:
+ pc->emit[1] = 0x18400000;
+ break;
+ case NV_OP_FSET_F32:
+ pc->emit[0] |= 0x20; /* fall through */
+ case NV_OP_SET_F32:
+ default:
+ pc->emit[1] = 0x180e0000;
+ break;
+ }
+
+ if (DFILE(i, 0) == NV_FILE_PRED) {
+ pc->emit[0] |= 0x1c000;
+ pc->emit[1] += 0x08000000;
+ }
+
+ pc->emit[1] |= i->set_cond << 23;
+
+ emit_form_0(pc, i);
+
+ emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */
+}
+
+static void
+emit_selp(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000004;
+ pc->emit[1] = 0x20000000;
+
+ emit_form_0(pc, i);
+
+ if (i->cc || (i->src[2]->mod & NV_MOD_NOT))
+ pc->emit[1] |= 1 << 20;
+}
+
+static void
+emit_slct(struct nv_pc *pc, struct nv_instruction *i)
+{
+ uint8_t cc = i->set_cond;
+
+ pc->emit[0] = 0x00000000;
+
+ switch (i->opcode) {
+ case NV_OP_SLCT_S32:
+ pc->emit[0] |= 0x20; /* fall through */
+ case NV_OP_SLCT_U32:
+ pc->emit[0] |= 0x3;
+ pc->emit[1] = 0x30000000;
+ break;
+ case NV_OP_SLCT_F32:
+ default:
+ pc->emit[1] = 0x38000000;
+ break;
+ }
+
+ emit_form_0(pc, i);
+
+ if (i->src[2]->mod & NV_MOD_NEG)
+ cc = nvc0_ir_reverse_cc(cc);
+
+ pc->emit[1] |= cc << 23;
+}
+
+static void
+emit_cvt(struct nv_pc *pc, struct nv_instruction *i)
+{
+ uint32_t rint;
+
+ pc->emit[0] = 0x00000004;
+ pc->emit[1] = 0x10000000;
+
+ /* if no type conversion specified, get type from opcode */
+ if (i->opcode != NV_OP_CVT && i->ext.cvt.d == i->ext.cvt.s)
+ i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode);
+
+ switch (i->ext.cvt.d) {
+ case NV_TYPE_F32:
+ switch (i->ext.cvt.s) {
+ case NV_TYPE_F32: pc->emit[1] = 0x10000000; break;
+ case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */
+ case NV_TYPE_U32: pc->emit[1] = 0x18000000; break;
+ }
+ break;
+ case NV_TYPE_S32: pc->emit[0] |= 0x80; /* fall through */
+ case NV_TYPE_U32:
+ switch (i->ext.cvt.s) {
+ case NV_TYPE_F32: pc->emit[1] = 0x14000000; break;
+ case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */
+ case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break;
+ }
+ break;
+ default:
+ assert(!"cvt: unknown type");
+ break;
+ }
+
+ rint = (i->ext.cvt.d == NV_TYPE_F32) ? 1 << 7 : 0;
+
+ if (i->opcode == NV_OP_FLOOR) {
+ pc->emit[0] |= rint;
+ pc->emit[1] |= 2 << 16;
+ } else
+ if (i->opcode == NV_OP_CEIL) {
+ pc->emit[0] |= rint;
+ pc->emit[1] |= 4 << 16;
+ } else
+ if (i->opcode == NV_OP_TRUNC) {
+ pc->emit[0] |= rint;
+ pc->emit[1] |= 6 << 16;
+ }
+
+ if (i->saturate || i->opcode == NV_OP_SAT)
+ pc->emit[0] |= 0x20;
+
+ if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS)
+ pc->emit[0] |= 1 << 6;
+ if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG)
+ pc->emit[0] |= 1 << 8;
+
+ pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20;
+ pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23;
+
+ emit_form_1(pc, i);
+}
+
+static void
+emit_interp(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0xc07e0000;
+
+ DID(pc, i->def[0], 14);
+
+ set_pred(pc, i);
+
+ if (i->indirect)
+ SID(pc, i->src[i->indirect], 20);
+ else
+ SID(pc, NULL, 20);
+
+ if (i->opcode == NV_OP_PINTERP) {
+ pc->emit[0] |= 0x040;
+ SID(pc, i->src[1], 26);
+ } else {
+ SID(pc, NULL, 26);
+ }
+
+ pc->emit[1] |= i->src[0]->value->reg.address & 0xffff;
+
+ if (i->centroid)
+ pc->emit[0] |= 0x100;
+ else
+ if (i->flat)
+ pc->emit[0] |= 0x080;
+}
+
+static void
+emit_vfetch(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x03f00006;
+ pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address;
+ if (i->patch)
+ pc->emit[0] |= 0x100;
+
+ set_pred(pc, i);
+
+ DVS(pc, i);
+ DID(pc, i->def[0], 14);
+
+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26);
+}
+
+static void
+emit_export(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000006;
+ pc->emit[1] = 0x0a000000;
+ if (i->patch)
+ pc->emit[0] |= 0x100;
+
+ set_pred(pc, i);
+
+ assert(SFILE(i, 0) == NV_FILE_MEM_V);
+ assert(SFILE(i, 1) == NV_FILE_GPR);
+
+ SID(pc, i->src[1], 26); /* register source */
+ SVS(pc, i->src[0]);
+
+ pc->emit[1] |= i->src[0]->value->reg.address & 0xfff;
+
+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
+}
+
+static void
+emit_mov(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (i->opcode == NV_OP_MOV)
+ i->lanes = 0xf;
+
+ if (SFILE(i, 0) == NV_FILE_IMM) {
+ pc->emit[0] = 0x000001e2;
+ pc->emit[1] = 0x18000000;
+ } else
+ if (SFILE(i, 0) == NV_FILE_PRED) {
+ pc->emit[0] = 0x1c000004;
+ pc->emit[1] = 0x080e0000;
+ } else {
+ pc->emit[0] = 0x00000004 | (i->lanes << 5);
+ pc->emit[1] = 0x28000000;
+ }
+
+ emit_form_1(pc, i);
+}
+
+static void
+emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i)
+{
+ assert(NV_IS_MEMORY_FILE(SFILE(i, 0)));
+
+ switch (SSIZE(i, 0)) {
+ case 1:
+ if (NV_TYPE_ISSGD(i->ext.cvt.s))
+ pc->emit[0] |= 0x20;
+ break;
+ case 2:
+ pc->emit[0] |= 0x40;
+ if (NV_TYPE_ISSGD(i->ext.cvt.s))
+ pc->emit[0] |= 0x20;
+ break;
+ case 4: pc->emit[0] |= 0x80; break;
+ case 8: pc->emit[0] |= 0xa0; break;
+ case 16: pc->emit[0] |= 0xc0; break;
+ default:
+ NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0));
+ break;
+ }
+}
+
+static void
+emit_ld_common(struct nv_pc *pc, struct nv_instruction *i)
+{
+ emit_ldst_size(pc, i);
+
+ set_pred(pc, i);
+ set_address_16(pc, i->src[0]);
+
+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
+ DID(pc, i->def[0], 14);
+}
+
+static void
+emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000006;
+ pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10);
+
+ emit_ld_common(pc, i);
+}
+
+static void
+emit_ld(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (SFILE(i, 0) >= NV_FILE_MEM_C(0) &&
+ SFILE(i, 0) <= NV_FILE_MEM_C(15)) {
+ if (SSIZE(i, 0) == 4 && i->indirect < 0) {
+ i->lanes = 0xf;
+ emit_mov(pc, i);
+ } else {
+ emit_ld_const(pc, i);
+ }
+ } else
+ if (SFILE(i, 0) == NV_FILE_MEM_L) {
+ pc->emit[0] = 0x00000005;
+ pc->emit[1] = 0xc0000000;
+
+ emit_ld_common(pc, i);
+ } else {
+ NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0));
+ abort();
+ }
+}
+
+static void
+emit_st(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (SFILE(i, 0) != NV_FILE_MEM_L)
+ NOUVEAU_ERR("emit_st(%u): file not handled yet\n", SFILE(i, 0));
+
+ pc->emit[0] = 0x00000005 | (0 << 8); /* write-back caching */
+ pc->emit[1] = 0xc8000000;
+
+ emit_ldst_size(pc, i);
+
+ set_pred(pc, i);
+ set_address_16(pc, i->src[0]);
+
+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
+ DID(pc, i->src[1]->value, 14);
+}
+
+void
+nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
+{
+ debug_printf("EMIT: "); nvc0_print_instruction(i);
+
+ switch (i->opcode) {
+ case NV_OP_VFETCH:
+ emit_vfetch(pc, i);
+ break;
+ case NV_OP_EXPORT:
+ if (!pc->is_fragprog)
+ emit_export(pc, i);
+ break;
+ case NV_OP_MOV:
+ emit_mov(pc, i);
+ break;
+ case NV_OP_LD:
+ emit_ld(pc, i);
+ break;
+ case NV_OP_ST:
+ emit_st(pc, i);
+ break;
+ case NV_OP_LINTERP:
+ case NV_OP_PINTERP:
+ emit_interp(pc, i);
+ break;
+ case NV_OP_ADD_F32:
+ emit_add_f32(pc, i);
+ break;
+ case NV_OP_AND:
+ case NV_OP_OR:
+ case NV_OP_XOR:
+ emit_bitop(pc, i);
+ break;
+ case NV_OP_CVT:
+ case NV_OP_ABS_F32:
+ case NV_OP_ABS_S32:
+ case NV_OP_NEG_F32:
+ case NV_OP_NEG_S32:
+ case NV_OP_SAT:
+ case NV_OP_CEIL:
+ case NV_OP_FLOOR:
+ case NV_OP_TRUNC:
+ emit_cvt(pc, i);
+ break;
+ case NV_OP_DFDX:
+ emit_ddx(pc, i);
+ break;
+ case NV_OP_DFDY:
+ emit_ddy(pc, i);
+ break;
+ case NV_OP_COS:
+ emit_flop(pc, i, 0);
+ break;
+ case NV_OP_SIN:
+ emit_flop(pc, i, 1);
+ break;
+ case NV_OP_EX2:
+ emit_flop(pc, i, 2);
+ break;
+ case NV_OP_LG2:
+ emit_flop(pc, i, 3);
+ break;
+ case NV_OP_RCP:
+ emit_flop(pc, i, 4);
+ break;
+ case NV_OP_RSQ:
+ emit_flop(pc, i, 5);
+ break;
+ case NV_OP_PRESIN:
+ case NV_OP_PREEX2:
+ emit_preop(pc, i);
+ break;
+ case NV_OP_MAD_F32:
+ emit_mad_f32(pc, i);
+ break;
+ case NV_OP_MAX_F32:
+ case NV_OP_MAX_S32:
+ case NV_OP_MAX_U32:
+ case NV_OP_MIN_F32:
+ case NV_OP_MIN_S32:
+ case NV_OP_MIN_U32:
+ emit_minmax(pc, i);
+ break;
+ case NV_OP_MUL_F32:
+ emit_mul_f32(pc, i);
+ break;
+ case NV_OP_SET_F32:
+ case NV_OP_SET_F32_AND:
+ case NV_OP_SET_F32_OR:
+ case NV_OP_SET_F32_XOR:
+ case NV_OP_SET_S32:
+ case NV_OP_SET_U32:
+ case NV_OP_FSET_F32:
+ emit_set(pc, i);
+ break;
+ case NV_OP_SHL:
+ case NV_OP_SHR:
+ case NV_OP_SAR:
+ emit_shift(pc, i);
+ break;
+ case NV_OP_TEX:
+ case NV_OP_TXB:
+ case NV_OP_TXL:
+ emit_tex(pc, i);
+ break;
+ case NV_OP_BRA:
+ emit_flow(pc, i, 0x40);
+ break;
+ case NV_OP_CALL:
+ emit_flow(pc, i, 0x50);
+ break;
+ case NV_OP_JOINAT:
+ emit_flow(pc, i, 0x60);
+ break;
+ case NV_OP_EXIT:
+ emit_flow(pc, i, 0x80);
+ break;
+ case NV_OP_RET:
+ emit_flow(pc, i, 0x90);
+ break;
+ case NV_OP_KIL:
+ emit_flow(pc, i, 0x98);
+ break;
+ case NV_OP_JOIN:
+ case NV_OP_NOP:
+ pc->emit[0] = 0x00003de4;
+ pc->emit[1] = 0x40000000;
+ break;
+ case NV_OP_SELP:
+ emit_selp(pc, i);
+ break;
+ case NV_OP_SLCT_F32:
+ case NV_OP_SLCT_S32:
+ case NV_OP_SLCT_U32:
+ emit_slct(pc, i);
+ break;
+ default:
+ NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode);
+ abort();
+ break;
+ }
+
+ if (i->join)
+ pc->emit[0] |= 0x10;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
new file mode 100644
index 0000000000..f7bf1680d0
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
@@ -0,0 +1,1420 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+#define DESCEND_ARBITRARY(j, f) \
+do { \
+ b->pass_seq = ctx->pc->pass_seq; \
+ \
+ for (j = 0; j < 2; ++j) \
+ if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
+ f(ctx, b->out[j]); \
+} while (0)
+
+static INLINE boolean
+registers_interfere(struct nv_value *a, struct nv_value *b)
+{
+ if (a->reg.file != b->reg.file)
+ return FALSE;
+ if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
+ return FALSE;
+
+ assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
+
+ if (a->join->reg.id < b->join->reg.id) {
+ return (a->join->reg.id + a->reg.size >= b->join->reg.id);
+ } else
+ if (a->join->reg.id > b->join->reg.id) {
+ return (b->join->reg.id + b->reg.size >= a->join->reg.id);
+ }
+
+ return FALSE;
+}
+
+static INLINE boolean
+values_equal(struct nv_value *a, struct nv_value *b)
+{
+ if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
+ return FALSE;
+ if (NV_IS_MEMORY_FILE(a->reg.file))
+ return a->reg.address == b->reg.address;
+ else
+ return a->join->reg.id == b->join->reg.id;
+}
+
+#if 0
+static INLINE boolean
+inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
+{
+ int si, di;
+
+ for (di = 0; di < 4 && a->def[di]; ++di)
+ for (si = 0; si < 5 && b->src[si]; ++si)
+ if (registers_interfere(a->def[di], b->src[si]->value))
+ return FALSE;
+
+ return TRUE;
+}
+
+/* Check whether we can swap the order of the instructions,
+ * where a & b may be either the earlier or the later one.
+ */
+static boolean
+inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
+{
+ return inst_commutation_check(a, b) && inst_commutation_check(b, a);
+}
+#endif
+
+static INLINE boolean
+inst_removable(struct nv_instruction *nvi)
+{
+ if (nvi->opcode == NV_OP_ST)
+ return FALSE;
+ return (!(nvi->terminator ||
+ nvi->join ||
+ nvi->target ||
+ nvi->fixed ||
+ nvc0_insn_refcount(nvi)));
+}
+
+/* Check if we do not actually have to emit this instruction. */
+static INLINE boolean
+inst_is_noop(struct nv_instruction *nvi)
+{
+ if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
+ return TRUE;
+ if (nvi->terminator || nvi->join)
+ return FALSE;
+ if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
+ return TRUE;
+ if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
+ return FALSE;
+ if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
+ return FALSE;
+
+ if (nvi->src[0]->value->join->reg.id < 0) {
+ NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
+ return TRUE;
+ }
+
+ if (nvi->opcode == NV_OP_SELECT)
+ if (!values_equal(nvi->def[0], nvi->src[1]->value))
+ return FALSE;
+ return values_equal(nvi->def[0], nvi->src[0]->value);
+}
+
+struct nv_pass {
+ struct nv_pc *pc;
+ int n;
+ void *priv;
+};
+
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
+
+static void
+nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
+{
+ struct nv_pc *pc = (struct nv_pc *)priv;
+ struct nv_basic_block *in;
+ struct nv_instruction *nvi, *next;
+ int j;
+
+ /* find first non-empty block emitted before b */
+ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
+
+ for (; j >= 0; --j) {
+ in = pc->bb_list[j];
+
+ /* check for no-op branches (BRA $PC+8) */
+ if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
+ in->emit_size -= 8;
+ pc->emit_size -= 8;
+
+ for (++j; j < pc->num_blocks; ++j)
+ pc->bb_list[j]->emit_pos -= 8;
+
+ nvc0_insn_delete(in->exit);
+ }
+ b->emit_pos = in->emit_pos + in->emit_size;
+
+ if (in->emit_size) /* no more no-op branches to b */
+ break;
+ }
+
+ pc->bb_list[pc->num_blocks++] = b;
+
+ /* visit node */
+
+ for (nvi = b->entry; nvi; nvi = next) {
+ next = nvi->next;
+ if (inst_is_noop(nvi) ||
+ (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
+ nvc0_insn_delete(nvi);
+ } else
+ b->emit_size += 8;
+ }
+ pc->emit_size += b->emit_size;
+
+#ifdef NOUVEAU_DEBUG
+ if (!b->entry)
+ debug_printf("BB:%i is now empty\n", b->id);
+ else
+ debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
+#endif
+}
+
+static int
+nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
+{
+ struct nv_pass pass;
+
+ pass.pc = pc;
+
+ pc->pass_seq++;
+ nv_pass_flatten(&pass, root);
+
+ nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
+
+ return 0;
+}
+
+int
+nvc0_pc_exec_pass2(struct nv_pc *pc)
+{
+ int i, ret;
+
+ NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
+
+ pc->num_blocks = 0; /* will reorder bb_list */
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
+ return ret;
+ return 0;
+}
+
+static INLINE boolean
+is_cspace_load(struct nv_instruction *nvi)
+{
+ if (!nvi)
+ return FALSE;
+ assert(nvi->indirect != 0);
+ return (nvi->opcode == NV_OP_LD &&
+ nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+ nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
+}
+
+static INLINE boolean
+is_immd32_load(struct nv_instruction *nvi)
+{
+ if (!nvi)
+ return FALSE;
+ return (nvi->opcode == NV_OP_MOV &&
+ nvi->src[0]->value->reg.file == NV_FILE_IMM &&
+ nvi->src[0]->value->reg.size == 4);
+}
+
+static INLINE void
+check_swap_src_0_1(struct nv_instruction *nvi)
+{
+ struct nv_ref *src0 = nvi->src[0];
+ struct nv_ref *src1 = nvi->src[1];
+
+ if (!nv_op_commutative(nvi->opcode) &&
+ NV_BASEOP(nvi->opcode) != NV_OP_SET &&
+ NV_BASEOP(nvi->opcode) != NV_OP_SLCT)
+ return;
+ assert(src0 && src1 && src0->value && src1->value);
+
+ if (src1->value->reg.file != NV_FILE_GPR)
+ return;
+
+ if (is_cspace_load(src0->value->insn)) {
+ if (!is_cspace_load(src1->value->insn)) {
+ nvi->src[0] = src1;
+ nvi->src[1] = src0;
+ }
+ } else
+ if (is_immd32_load(src0->value->insn)) {
+ if (!is_cspace_load(src1->value->insn) &&
+ !is_immd32_load(src1->value->insn)) {
+ nvi->src[0] = src1;
+ nvi->src[1] = src0;
+ }
+ }
+
+ if (nvi->src[0] != src0) {
+ if (NV_BASEOP(nvi->opcode) == NV_OP_SET)
+ nvi->set_cond = nvc0_ir_reverse_cc(nvi->set_cond);
+ else
+ if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT)
+ nvi->set_cond = NV_CC_INVERSE(nvi->set_cond);
+ }
+}
+
+static void
+nvi_set_indirect_load(struct nv_pc *pc,
+ struct nv_instruction *nvi, struct nv_value *val)
+{
+ for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
+ ++nvi->indirect);
+ assert(nvi->indirect < 6);
+ nv_reference(pc, nvi, nvi->indirect, val);
+}
+
+static int
+nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi, *ld;
+ int s;
+
+ for (nvi = b->entry; nvi; nvi = nvi->next) {
+ check_swap_src_0_1(nvi);
+
+ for (s = 0; s < 3 && nvi->src[s]; ++s) {
+ ld = nvi->src[s]->value->insn;
+ if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
+ continue;
+ if (!nvc0_insn_can_load(nvi, s, ld))
+ continue;
+
+ /* fold it ! */
+ nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
+ if (ld->indirect >= 0)
+ nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
+
+ if (!nvc0_insn_refcount(ld))
+ nvc0_insn_delete(ld);
+ }
+ }
+ DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
+
+ return 0;
+}
+
+/* NOTE: Assumes loads have not yet been folded. */
+static int
+nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi, *mi, *next;
+ int j;
+ uint8_t mod;
+
+ for (nvi = b->entry; nvi; nvi = next) {
+ next = nvi->next;
+ if (nvi->opcode == NV_OP_SUB) {
+ nvi->src[1]->mod ^= NV_MOD_NEG;
+ nvi->opcode = NV_OP_ADD;
+ }
+
+ for (j = 0; j < 3 && nvi->src[j]; ++j) {
+ mi = nvi->src[j]->value->insn;
+ if (!mi)
+ continue;
+ if (mi->def[0]->refc > 1 || mi->predicate >= 0)
+ continue;
+
+ if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
+ else
+ if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
+ else
+ continue;
+ assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
+
+ mod |= mi->src[0]->mod;
+
+ if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
+ /* abs neg [abs] = abs */
+ mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
+ } else
+ if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
+ /* neg as opcode and modifier on same insn cannot occur */
+ /* neg neg abs = abs, neg neg = identity */
+ assert(j == 0);
+ if (mod & NV_MOD_ABS)
+ nvi->opcode = NV_OP_ABS;
+ else
+ nvi->opcode = NV_OP_MOV;
+ mod = 0;
+ }
+
+ if ((nv_op_supported_src_mods(nvi->opcode, j) & mod) != mod)
+ continue;
+
+ nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
+
+ nvi->src[j]->mod ^= mod;
+ }
+
+ if (nvi->opcode == NV_OP_SAT) {
+ mi = nvi->src[0]->value->insn;
+
+ if (mi->def[0]->refc > 1 ||
+ (mi->opcode != NV_OP_ADD &&
+ mi->opcode != NV_OP_MUL &&
+ mi->opcode != NV_OP_MAD))
+ continue;
+ mi->saturate = 1;
+ mi->def[0] = nvi->def[0];
+ mi->def[0]->insn = mi;
+ nvc0_insn_delete(nvi);
+ }
+ }
+ DESCEND_ARBITRARY(j, nv_pass_lower_mods);
+
+ return 0;
+}
+
+#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
+
+static void
+apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
+{
+ if (mod & NV_MOD_ABS) {
+ if (type == NV_TYPE_F32)
+ *val &= 0x7fffffff;
+ else
+ if ((*val) & (1 << 31))
+ *val = ~(*val) + 1;
+ }
+ if (mod & NV_MOD_NEG) {
+ if (type == NV_TYPE_F32)
+ *val ^= 0x80000000;
+ else
+ *val = ~(*val) + 1;
+ }
+ if (mod & NV_MOD_SAT) {
+ union {
+ float f;
+ uint32_t u;
+ int32_t i;
+ } u;
+ u.u = *val;
+ if (type == NV_TYPE_F32) {
+ u.f = CLAMP(u.f, -1.0f, 1.0f);
+ } else
+ if (type == NV_TYPE_U16) {
+ u.u = MIN2(u.u, 0xffff);
+ } else
+ if (type == NV_TYPE_S16) {
+ u.i = CLAMP(u.i, -32768, 32767);
+ }
+ *val = u.u;
+ }
+ if (mod & NV_MOD_NOT)
+ *val = ~*val;
+}
+
+static void
+constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
+ struct nv_value *src0, struct nv_value *src1)
+{
+ struct nv_value *val;
+ union {
+ float f32;
+ uint32_t u32;
+ int32_t s32;
+ } u0, u1, u;
+ ubyte type;
+
+ if (!nvi->def[0])
+ return;
+ type = NV_OPTYPE(nvi->opcode);
+
+ u.u32 = 0;
+ u0.u32 = src0->reg.imm.u32;
+ u1.u32 = src1->reg.imm.u32;
+
+ apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
+ apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
+
+ switch (nvi->opcode) {
+ case NV_OP_MAD_F32:
+ if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
+ return;
+ /* fall through */
+ case NV_OP_MUL_F32:
+ u.f32 = u0.f32 * u1.f32;
+ break;
+ case NV_OP_MUL_B32:
+ u.u32 = u0.u32 * u1.u32;
+ break;
+ case NV_OP_ADD_F32:
+ u.f32 = u0.f32 + u1.f32;
+ break;
+ case NV_OP_ADD_B32:
+ u.u32 = u0.u32 + u1.u32;
+ break;
+ case NV_OP_SUB_F32:
+ u.f32 = u0.f32 - u1.f32;
+ break;
+ /*
+ case NV_OP_SUB_B32:
+ u.u32 = u0.u32 - u1.u32;
+ break;
+ */
+ default:
+ return;
+ }
+
+ val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
+ val->reg.imm.u32 = u.u32;
+
+ nv_reference(pc, nvi, 1, NULL);
+ nv_reference(pc, nvi, 0, val);
+
+ if (nvi->opcode == NV_OP_MAD_F32) {
+ nvi->src[1] = nvi->src[0];
+ nvi->src[0] = nvi->src[2];
+ nvi->src[2] = NULL;
+ nvi->opcode = NV_OP_ADD_F32;
+
+ if (val->reg.imm.u32 == 0) {
+ nvi->src[1] = NULL;
+ nvi->opcode = NV_OP_MOV;
+ }
+ } else {
+ nvi->opcode = NV_OP_MOV;
+ }
+}
+
+static void
+constant_operand(struct nv_pc *pc,
+ struct nv_instruction *nvi, struct nv_value *val, int s)
+{
+ union {
+ float f32;
+ uint32_t u32;
+ int32_t s32;
+ } u;
+ int shift;
+ int t = s ? 0 : 1;
+ uint op;
+ ubyte type;
+
+ if (!nvi->def[0])
+ return;
+ type = NV_OPTYPE(nvi->opcode);
+
+ u.u32 = val->reg.imm.u32;
+ apply_modifiers(&u.u32, type, nvi->src[s]->mod);
+
+ if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
+ nvi->opcode = NV_OP_MOV;
+ nv_reference(pc, nvi, t, NULL);
+ if (s) {
+ nvi->src[0] = nvi->src[1];
+ nvi->src[1] = NULL;
+ }
+ return;
+ }
+
+ switch (nvi->opcode) {
+ case NV_OP_MUL_F32:
+ if (u.f32 == 1.0f || u.f32 == -1.0f) {
+ if (u.f32 == -1.0f)
+ nvi->src[t]->mod ^= NV_MOD_NEG;
+ switch (nvi->src[t]->mod) {
+ case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
+ case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
+ case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
+ default:
+ return;
+ }
+ nvi->opcode = op;
+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
+ nv_reference(pc, nvi, 1, NULL);
+ nvi->src[0]->mod = 0;
+ } else
+ if (u.f32 == 2.0f || u.f32 == -2.0f) {
+ if (u.f32 == -2.0f)
+ nvi->src[t]->mod ^= NV_MOD_NEG;
+ nvi->opcode = NV_OP_ADD_F32;
+ nv_reference(pc, nvi, s, nvi->src[t]->value);
+ nvi->src[s]->mod = nvi->src[t]->mod;
+ }
+ break;
+ case NV_OP_ADD_F32:
+ if (u.u32 == 0) {
+ switch (nvi->src[t]->mod) {
+ case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
+ case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
+ case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
+ case NV_MOD_NEG | NV_MOD_ABS:
+ op = NV_OP_CVT;
+ nvi->ext.cvt.s = nvi->ext.cvt.d = type;
+ break;
+ default:
+ return;
+ }
+ nvi->opcode = op;
+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
+ nv_reference(pc, nvi, 1, NULL);
+ if (nvi->opcode != NV_OP_CVT)
+ nvi->src[0]->mod = 0;
+ }
+ break;
+ case NV_OP_ADD_B32:
+ if (u.u32 == 0) {
+ assert(nvi->src[t]->mod == 0);
+ nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
+ nvi->ext.cvt.s = nvi->ext.cvt.d = type;
+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
+ nv_reference(pc, nvi, 1, NULL);
+ }
+ break;
+ case NV_OP_MUL_B32:
+ /* multiplication by 0 already handled above */
+ assert(nvi->src[s]->mod == 0);
+ shift = ffs(u.s32) - 1;
+ if (shift == 0) {
+ nvi->opcode = NV_OP_MOV;
+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
+ nv_reference(pc, nvi, 1, NULL);
+ } else
+ if (u.s32 > 0 && u.s32 == (1 << shift)) {
+ nvi->opcode = NV_OP_SHL;
+ (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift;
+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
+ nv_reference(pc, nvi, 1, val);
+ break;
+ }
+ break;
+ case NV_OP_RCP:
+ u.f32 = 1.0f / u.f32;
+ (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
+ nvi->opcode = NV_OP_MOV;
+ assert(s == 0);
+ nv_reference(pc, nvi, 0, val);
+ break;
+ case NV_OP_RSQ:
+ u.f32 = 1.0f / sqrtf(u.f32);
+ (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
+ nvi->opcode = NV_OP_MOV;
+ assert(s == 0);
+ nv_reference(pc, nvi, 0, val);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
+{
+ struct nv_value *src0 = nvi->src[0]->value;
+ struct nv_value *src1 = nvi->src[1]->value;
+
+ if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
+ return;
+ if (src0->reg.file != NV_FILE_GPR)
+ return;
+ nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
+ nvc0_insn_delete(nvi);
+}
+
+/* check if we can MUL + ADD -> MAD/FMA */
+static void
+handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
+{
+ struct nv_value *src0 = nvi->src[0]->value;
+ struct nv_value *src1 = nvi->src[1]->value;
+ struct nv_value *src;
+ int s;
+ uint8_t mod[4];
+
+ if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
+ else
+ if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
+ else
+ return;
+
+ if ((src0->insn && src0->insn->bb != nvi->bb) ||
+ (src1->insn && src1->insn->bb != nvi->bb))
+ return;
+
+ /* check for immediates from prior constant folding */
+ if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
+ return;
+ src = nvi->src[s]->value;
+
+ mod[0] = nvi->src[0]->mod;
+ mod[1] = nvi->src[1]->mod;
+ mod[2] = src->insn->src[0]->mod;
+ mod[3] = src->insn->src[1]->mod;
+
+ if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
+ return;
+
+ nvi->opcode = NV_OP_MAD_F32;
+
+ nv_reference(ctx->pc, nvi, s, NULL);
+ nvi->src[2] = nvi->src[!s];
+ nvi->src[!s] = NULL;
+
+ nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
+ nvi->src[0]->mod = mod[2] ^ mod[s];
+ nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
+ nvi->src[1]->mod = mod[3];
+}
+
+static int
+nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi, *next;
+ int j;
+
+ for (nvi = b->entry; nvi; nvi = next) {
+ struct nv_value *src0, *src1;
+ uint baseop = NV_BASEOP(nvi->opcode);
+
+ next = nvi->next;
+
+ src0 = nvc0_pc_find_immediate(nvi->src[0]);
+ src1 = nvc0_pc_find_immediate(nvi->src[1]);
+
+ if (src0 && src1) {
+ constant_expression(ctx->pc, nvi, src0, src1);
+ } else {
+ if (src0)
+ constant_operand(ctx->pc, nvi, src0, 0);
+ else
+ if (src1)
+ constant_operand(ctx->pc, nvi, src1, 1);
+ }
+
+ if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
+ handle_min_max(ctx, nvi);
+ else
+ if (nvi->opcode == NV_OP_ADD_F32)
+ handle_add_mul(ctx, nvi);
+ }
+ DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
+
+ return 0;
+}
+
+/* TODO: redundant store elimination */
+
+struct mem_record {
+ struct mem_record *next;
+ struct nv_instruction *insn;
+ uint32_t ofst;
+ uint32_t base;
+ uint32_t size;
+};
+
+#define MEM_RECORD_POOL_SIZE 1024
+
+struct pass_reld_elim {
+ struct nv_pc *pc;
+
+ struct mem_record *imm;
+ struct mem_record *mem_v;
+ struct mem_record *mem_a;
+ struct mem_record *mem_c[16];
+ struct mem_record *mem_l;
+
+ struct mem_record pool[MEM_RECORD_POOL_SIZE];
+ int alloc;
+};
+
+/* Extend the load operation in @rec to also cover the data loaded by @ld.
+ * The two loads may not overlap but reference adjacent memory locations.
+ */
+static void
+combine_load(struct nv_pc *pc, struct mem_record *rec,
+ struct nv_instruction *ld)
+{
+ struct nv_instruction *fv = rec->insn;
+ struct nv_value *mem = ld->src[0]->value;
+ uint32_t size = rec->size + mem->reg.size;
+ int j;
+ int d = rec->size / 4;
+
+ assert(rec->size < 16);
+ if (rec->ofst > mem->reg.address) {
+ if ((size == 8 && mem->reg.address & 3) ||
+ (size > 8 && mem->reg.address & 7))
+ return;
+ rec->ofst = mem->reg.address;
+ for (j = 0; j < d; ++j)
+ fv->def[mem->reg.size / 4 + j] = fv->def[j];
+ d = 0;
+ } else
+ if ((size == 8 && rec->ofst & 3) ||
+ (size > 8 && rec->ofst & 7)) {
+ return;
+ }
+
+ for (j = 0; j < mem->reg.size / 4; ++j) {
+ fv->def[d] = ld->def[j];
+ fv->def[d++]->insn = fv;
+ }
+
+ if (fv->src[0]->value->refc > 1)
+ nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
+ fv->src[0]->value->reg.address = rec->ofst;
+ fv->src[0]->value->reg.size = rec->size = size;
+
+ nvc0_insn_delete(ld);
+}
+
+static void
+combine_export(struct mem_record *rec, struct nv_instruction *ex)
+{
+
+}
+
+static INLINE void
+add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
+ uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
+{
+ struct mem_record *it = &ctx->pool[ctx->alloc++];
+
+ it->next = *rec;
+ *rec = it;
+ it->base = base;
+ it->ofst = ofst;
+ it->insn = nvi;
+ it->size = nvi->src[0]->value->reg.size;
+}
+
+/* vectorize and reuse loads from memory or of immediates */
+static int
+nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+ struct mem_record **rec, *it;
+ struct nv_instruction *ld, *next;
+ struct nv_value *mem;
+ uint32_t base, ofst;
+ int s;
+
+ for (ld = b->entry; ld; ld = next) {
+ next = ld->next;
+
+ if (is_cspace_load(ld)) {
+ mem = ld->src[0]->value;
+ rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
+ } else
+ if (ld->opcode == NV_OP_VFETCH) {
+ mem = ld->src[0]->value;
+ rec = &ctx->mem_a;
+ } else
+ if (ld->opcode == NV_OP_EXPORT) {
+ mem = ld->src[0]->value;
+ if (mem->reg.file != NV_FILE_MEM_V)
+ continue;
+ rec = &ctx->mem_v;
+ } else {
+ continue;
+ }
+ if (ld->def[0] && ld->def[0]->refc == 0)
+ continue;
+ ofst = mem->reg.address;
+ base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
+
+ for (it = *rec; it; it = it->next) {
+ if (it->base == base &&
+ ((it->ofst >> 4) == (ofst >> 4)) &&
+ ((it->ofst + it->size == ofst) ||
+ (it->ofst - mem->reg.size == ofst))) {
+ /* only NV_OP_VFETCH can load exactly 12 bytes */
+ if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
+ continue;
+ if (it->ofst < ofst) {
+ if ((it->ofst & 0xf) == 4)
+ continue;
+ } else
+ if ((ofst & 0xf) == 4)
+ continue;
+ break;
+ }
+ }
+ if (it) {
+ switch (ld->opcode) {
+ case NV_OP_EXPORT: combine_export(it, ld); break;
+ default:
+ combine_load(ctx->pc, it, ld);
+ break;
+ }
+ } else
+ if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
+ add_mem_record(ctx, rec, base, ofst, ld);
+ }
+ }
+
+ ctx->alloc = 0;
+ ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
+ for (s = 0; s < 16; ++s)
+ ctx->mem_c[s] = NULL;
+
+ DESCEND_ARBITRARY(s, nv_pass_mem_opt);
+ return 0;
+}
+
+static void
+eliminate_store(struct mem_record *rec, struct nv_instruction *st)
+{
+}
+
+/* elimination of redundant stores */
+static int
+pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+ struct mem_record **rec, *it;
+ struct nv_instruction *st, *next;
+ struct nv_value *mem;
+ uint32_t base, ofst, size;
+ int s;
+
+ for (st = b->entry; st; st = next) {
+ next = st->next;
+
+ if (st->opcode == NV_OP_ST) {
+ mem = st->src[0]->value;
+ rec = &ctx->mem_l;
+ } else
+ if (st->opcode == NV_OP_EXPORT) {
+ mem = st->src[0]->value;
+ if (mem->reg.file != NV_FILE_MEM_V)
+ continue;
+ rec = &ctx->mem_v;
+ } else
+ if (st->opcode == NV_OP_ST) {
+ /* TODO: purge */
+ }
+ ofst = mem->reg.address;
+ base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
+ size = mem->reg.size;
+
+ for (it = *rec; it; it = it->next) {
+ if (it->base == base &&
+ (it->ofst <= ofst && (it->ofst + size) > ofst))
+ break;
+ }
+ if (it)
+ eliminate_store(it, st);
+ else
+ add_mem_record(ctx, rec, base, ofst, st);
+ }
+
+ DESCEND_ARBITRARY(s, nv_pass_mem_opt);
+ return 0;
+}
+
+/* TODO: properly handle loads from l[] memory in the presence of stores */
+static int
+nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+#if 0
+ struct load_record **rec, *it;
+ struct nv_instruction *ld, *next;
+ uint64_t data[2];
+ struct nv_value *val;
+ int j;
+
+ for (ld = b->entry; ld; ld = next) {
+ next = ld->next;
+ if (!ld->src[0])
+ continue;
+ val = ld->src[0]->value;
+ rec = NULL;
+
+ if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
+ data[0] = val->reg.id;
+ data[1] = 0;
+ rec = &ctx->mem_v;
+ } else
+ if (ld->opcode == NV_OP_LDA) {
+ data[0] = val->reg.id;
+ data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
+ if (val->reg.file >= NV_FILE_MEM_C(0) &&
+ val->reg.file <= NV_FILE_MEM_C(15))
+ rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
+ else
+ if (val->reg.file == NV_FILE_MEM_S)
+ rec = &ctx->mem_s;
+ else
+ if (val->reg.file == NV_FILE_MEM_L)
+ rec = &ctx->mem_l;
+ } else
+ if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
+ data[0] = val->reg.imm.u32;
+ data[1] = 0;
+ rec = &ctx->imm;
+ }
+
+ if (!rec || !ld->def[0]->refc)
+ continue;
+
+ for (it = *rec; it; it = it->next)
+ if (it->data[0] == data[0] && it->data[1] == data[1])
+ break;
+
+ if (it) {
+ if (ld->def[0]->reg.id >= 0)
+ it->value = ld->def[0];
+ else
+ if (!ld->fixed)
+ nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
+ } else {
+ if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
+ continue;
+ it = &ctx->pool[ctx->alloc++];
+ it->next = *rec;
+ it->data[0] = data[0];
+ it->data[1] = data[1];
+ it->value = ld->def[0];
+ *rec = it;
+ }
+ }
+
+ ctx->imm = NULL;
+ ctx->mem_s = NULL;
+ ctx->mem_v = NULL;
+ for (j = 0; j < 16; ++j)
+ ctx->mem_c[j] = NULL;
+ ctx->mem_l = NULL;
+ ctx->alloc = 0;
+
+ DESCEND_ARBITRARY(j, nv_pass_reload_elim);
+#endif
+ return 0;
+}
+
+static int
+nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ int i, c, j;
+
+ for (i = 0; i < ctx->pc->num_instructions; ++i) {
+ struct nv_instruction *nvi = &ctx->pc->instructions[i];
+ struct nv_value *def[4];
+
+ if (!nv_is_texture_op(nvi->opcode))
+ continue;
+ nvi->tex_mask = 0;
+
+ for (c = 0; c < 4; ++c) {
+ if (nvi->def[c]->refc)
+ nvi->tex_mask |= 1 << c;
+ def[c] = nvi->def[c];
+ }
+
+ j = 0;
+ for (c = 0; c < 4; ++c)
+ if (nvi->tex_mask & (1 << c))
+ nvi->def[j++] = def[c];
+ for (c = 0; c < 4; ++c)
+ if (!(nvi->tex_mask & (1 << c)))
+ nvi->def[j++] = def[c];
+ assert(j == 4);
+ }
+ return 0;
+}
+
+struct nv_pass_dce {
+ struct nv_pc *pc;
+ uint removed;
+};
+
+static int
+nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
+{
+ int j;
+ struct nv_instruction *nvi, *next;
+
+ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
+ next = nvi->next;
+
+ if (inst_removable(nvi)) {
+ nvc0_insn_delete(nvi);
+ ++ctx->removed;
+ }
+ }
+ DESCEND_ARBITRARY(j, nv_pass_dce);
+
+ return 0;
+}
+
+/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
+ * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
+ * BREAK and dummy ELSE block.
+ */
+static INLINE boolean
+bb_is_if_else_endif(struct nv_basic_block *bb)
+{
+ if (!bb->out[0] || !bb->out[1])
+ return FALSE;
+
+ if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
+ return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
+ !bb->out[1]->out[1]);
+ } else {
+ return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
+ !bb->out[0]->out[1] &&
+ !bb->out[1]->out[1]);
+ }
+}
+
+/* Predicate instructions and delete any branch at the end if it is
+ * not a break from a loop.
+ */
+static void
+predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
+ struct nv_value *pred, uint8_t cc)
+{
+ struct nv_instruction *nvi, *prev;
+ int s;
+
+ if (!b->entry)
+ return;
+ for (nvi = b->entry; nvi; nvi = nvi->next) {
+ prev = nvi;
+ if (inst_is_noop(nvi))
+ continue;
+ for (s = 0; nvi->src[s]; ++s);
+ assert(s < 6);
+ nvi->predicate = s;
+ nvi->cc = cc;
+ nv_reference(pc, nvi, nvi->predicate, pred);
+ }
+ if (prev->opcode == NV_OP_BRA &&
+ b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
+ b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
+ nvc0_insn_delete(prev);
+}
+
+static INLINE boolean
+may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
+{
+ if (nvi->def[0] && values_equal(nvi->def[0], pred))
+ return FALSE;
+ return nvc0_insn_is_predicateable(nvi);
+}
+
+/* Transform IF/ELSE/ENDIF constructs into predicated instructions
+ * where feasible.
+ */
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi;
+ struct nv_value *pred;
+ int k;
+ int n0, n1; /* instruction counts of outgoing blocks */
+
+ if (bb_is_if_else_endif(b)) {
+ assert(b->exit && b->exit->opcode == NV_OP_BRA);
+
+ assert(b->exit->predicate >= 0);
+ pred = b->exit->src[b->exit->predicate]->value;
+
+ n1 = n0 = 0;
+ for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
+ if (!may_predicate_insn(nvi, pred))
+ break;
+ if (!nvi) {
+ /* we're after register allocation, so there always is an ELSE block */
+ for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
+ if (!may_predicate_insn(nvi, pred))
+ break;
+ }
+
+ /* 12 is an arbitrary limit */
+ if (!nvi && n0 < 12 && n1 < 12) {
+ predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
+ predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
+
+ nvc0_insn_delete(b->exit); /* delete the branch */
+
+ /* and a potential joinat before it */
+ if (b->exit && b->exit->opcode == NV_OP_JOINAT)
+ nvc0_insn_delete(b->exit);
+
+ /* remove join operations at the end of the conditional */
+ k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
+ if ((nvi = b->out[0]->out[k]->entry)) {
+ nvi->join = 0;
+ if (nvi->opcode == NV_OP_JOIN)
+ nvc0_insn_delete(nvi);
+ }
+ }
+ }
+ DESCEND_ARBITRARY(k, nv_pass_flatten);
+
+ return 0;
+}
+
+/* Tests instructions for equality, but independently of sources. */
+static boolean
+is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
+{
+ if (a->opcode != b->opcode)
+ return FALSE;
+ if (nv_is_texture_op(a->opcode)) {
+ if (a->ext.tex.t != b->ext.tex.t ||
+ a->ext.tex.s != b->ext.tex.s)
+ return FALSE;
+ if (a->tex_dim != b->tex_dim ||
+ a->tex_array != b->tex_array ||
+ a->tex_cube != b->tex_cube ||
+ a->tex_shadow != b->tex_shadow ||
+ a->tex_live != b->tex_live)
+ return FALSE;
+ } else
+ if (a->opcode == NV_OP_CVT) {
+ if (a->ext.cvt.s != b->ext.cvt.s ||
+ a->ext.cvt.d != b->ext.cvt.d)
+ return FALSE;
+ } else
+ if (NV_BASEOP(a->opcode) == NV_OP_SET ||
+ NV_BASEOP(a->opcode) == NV_OP_SLCT) {
+ if (a->set_cond != b->set_cond)
+ return FALSE;
+ } else
+ if (a->opcode == NV_OP_LINTERP ||
+ a->opcode == NV_OP_PINTERP) {
+ if (a->centroid != b->centroid ||
+ a->flat != b->flat)
+ return FALSE;
+ }
+ if (a->cc != b->cc)
+ return FALSE;
+ if (a->lanes != b->lanes ||
+ a->patch != b->patch ||
+ a->saturate != b->saturate)
+ return FALSE;
+ if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
+ return FALSE;
+ return TRUE;
+}
+
+/* local common subexpression elimination, stupid O(n^2) implementation */
+static int
+nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *ir, *ik, *next;
+ struct nv_instruction *entry = b->phi ? b->phi : b->entry;
+ int s, d;
+ unsigned int reps;
+
+ do {
+ reps = 0;
+ for (ir = entry; ir; ir = next) {
+ next = ir->next;
+ if (ir->fixed)
+ continue;
+ for (ik = entry; ik != ir; ik = ik->next) {
+ if (!is_operation_equal(ir, ik))
+ continue;
+ if (!ir->def[0] || !ik->def[0])
+ continue;
+
+ if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
+ continue;
+
+ for (d = 0; d < 4; ++d) {
+ if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
+ break;
+ if (ir->def[d]) {
+ if (!values_equal(ik->def[0], ir->def[0]))
+ break;
+ } else {
+ d = 4;
+ break;
+ }
+ }
+ if (d != 4)
+ continue;
+
+ for (s = 0; s < 5; ++s) {
+ struct nv_value *a, *b;
+
+ if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
+ break;
+ if (!ir->src[s]) {
+ s = 5;
+ break;
+ }
+
+ if (ik->src[s]->mod != ir->src[s]->mod)
+ break;
+ a = ik->src[s]->value;
+ b = ir->src[s]->value;
+ if (a == b)
+ continue;
+ if (a->reg.file != b->reg.file ||
+ a->reg.id < 0 || /* this excludes memory loads/stores */
+ a->reg.id != b->reg.id)
+ break;
+ }
+ if (s == 5) {
+ nvc0_insn_delete(ir);
+ for (d = 0; d < 4 && ir->def[d]; ++d)
+ nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
+ ++reps;
+ break;
+ }
+ }
+ }
+ } while(reps);
+
+ DESCEND_ARBITRARY(s, nv_pass_cse);
+
+ return 0;
+}
+
+/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
+ * neighbouring registers. CSE might have messed this up.
+ * Just generate a MOV for each source to avoid conflicts if they're used in
+ * multiple NV_OP_BIND at different positions.
+ */
+static int
+nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_value *val;
+ struct nv_instruction *bnd, *nvi, *next;
+ int s;
+
+ for (bnd = b->entry; bnd; bnd = next) {
+ next = bnd->next;
+ if (bnd->opcode != NV_OP_BIND)
+ continue;
+ for (s = 0; s < 4 && bnd->src[s]; ++s) {
+ val = bnd->src[s]->value;
+
+ nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
+ nvi->def[0] = new_value_like(ctx->pc, val);
+ nvi->def[0]->insn = nvi;
+ nv_reference(ctx->pc, nvi, 0, val);
+ nv_reference(ctx->pc, bnd, s, nvi->def[0]);
+
+ nvc0_insn_insert_before(bnd, nvi);
+ }
+ }
+ DESCEND_ARBITRARY(s, nv_pass_fix_bind);
+
+ return 0;
+}
+
+static int
+nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
+{
+ struct pass_reld_elim *reldelim;
+ struct nv_pass pass;
+ struct nv_pass_dce dce;
+ int ret;
+
+ pass.n = 0;
+ pass.pc = pc;
+
+ /* Do CSE so we can just compare values by pointer in subsequent passes. */
+ pc->pass_seq++;
+ ret = nv_pass_cse(&pass, root);
+ if (ret)
+ return ret;
+
+ /* Do this first, so we don't have to pay attention
+ * to whether sources are supported memory loads.
+ */
+ pc->pass_seq++;
+ ret = nv_pass_algebraic_opt(&pass, root);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nv_pass_lower_mods(&pass, root);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nvc0_pass_fold_loads(&pass, root);
+ if (ret)
+ return ret;
+
+ if (pc->opt_reload_elim) {
+ reldelim = CALLOC_STRUCT(pass_reld_elim);
+ reldelim->pc = pc;
+
+ pc->pass_seq++;
+ ret = nv_pass_reload_elim(reldelim, root);
+ if (ret) {
+ FREE(reldelim);
+ return ret;
+ }
+ memset(reldelim, 0, sizeof(struct pass_reld_elim));
+ reldelim->pc = pc;
+ }
+
+ /* May run DCE before load-combining since that pass will clean up
+ * after itself.
+ */
+ dce.pc = pc;
+ do {
+ dce.removed = 0;
+ pc->pass_seq++;
+ ret = nv_pass_dce(&dce, root);
+ if (ret)
+ return ret;
+ } while (dce.removed);
+
+ if (pc->opt_reload_elim) {
+ pc->pass_seq++;
+ ret = nv_pass_mem_opt(reldelim, root);
+ if (!ret) {
+ memset(reldelim, 0, sizeof(struct pass_reld_elim));
+ reldelim->pc = pc;
+
+ pc->pass_seq++;
+ ret = nv_pass_mem_opt(reldelim, root);
+ }
+ FREE(reldelim);
+ if (ret)
+ return ret;
+ }
+
+ ret = nv_pass_tex_mask(&pass, root);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nv_pass_fix_bind(&pass, root);
+
+ return ret;
+}
+
+int
+nvc0_pc_exec_pass0(struct nv_pc *pc)
+{
+ int i, ret;
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
+ return ret;
+ return 0;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c
new file mode 100644
index 0000000000..1f37cb802d
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_pc.h"
+
+#define PRINT(args...) debug_printf(args)
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#endif
+
+static const char *norm = "\x1b[00m";
+static const char *gree = "\x1b[32m";
+static const char *blue = "\x1b[34m";
+static const char *cyan = "\x1b[36m";
+static const char *yllw = "\x1b[33m";
+static const char *mgta = "\x1b[35m";
+
+static const char *nv_cond_names[] =
+{
+ "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "",
+ "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "",
+ "o", "c", "a", "s"
+};
+
+static const char *nv_modifier_strings[] =
+{
+ "",
+ "neg",
+ "abs",
+ "neg abs",
+ "not",
+ "not neg"
+ "not abs",
+ "not neg abs",
+ "sat",
+ "BAD_MOD"
+};
+
+const char *
+nvc0_opcode_name(uint opcode)
+{
+ return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name;
+}
+
+static INLINE const char *
+nv_type_name(ubyte type, ubyte size)
+{
+ switch (type) {
+ case NV_TYPE_U16: return "u16";
+ case NV_TYPE_S16: return "s16";
+ case NV_TYPE_F32: return "f32";
+ case NV_TYPE_U32: return "u32";
+ case NV_TYPE_S32: return "s32";
+ case NV_TYPE_P32: return "p32";
+ case NV_TYPE_F64: return "f64";
+ case NV_TYPE_ANY:
+ {
+ switch (size) {
+ case 1: return "b8";
+ case 2: return "b16";
+ case 4: return "b32";
+ case 8: return "b64";
+ case 12: return "b96";
+ case 16: return "b128";
+ default:
+ return "BAD_SIZE";
+ }
+ }
+ default:
+ return "BAD_TYPE";
+ }
+}
+
+static INLINE const char *
+nv_cond_name(ubyte cc)
+{
+ return nv_cond_names[MIN2(cc, 19)];
+}
+
+static INLINE const char *
+nv_modifier_string(ubyte mod)
+{
+ return nv_modifier_strings[MIN2(mod, 9)];
+}
+
+static INLINE int
+nv_value_id(struct nv_value *value)
+{
+ if (value->join->reg.id >= 0)
+ return value->join->reg.id;
+ return value->n;
+}
+
+static INLINE boolean
+nv_value_allocated(struct nv_value *value)
+{
+ return (value->reg.id >= 0) ? TRUE : FALSE;
+}
+
+static INLINE void
+nv_print_address(const char c, int buf, struct nv_value *a, int offset)
+{
+ const char ac = (a && nv_value_allocated(a)) ? '$' : '%';
+ char sg;
+
+ if (offset < 0) {
+ sg = '-';
+ offset = -offset;
+ } else {
+ sg = '+';
+ }
+
+ if (buf >= 0)
+ PRINT(" %s%c%i[", cyan, c, buf);
+ else
+ PRINT(" %s%c[", cyan, c);
+ if (a)
+ PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg);
+ PRINT("%s0x%x%s]", yllw, offset, cyan);
+}
+
+static INLINE void
+nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type)
+{
+ char reg_pfx = nv_value_allocated(value->join) ? '$' : '%';
+
+ if (value->reg.file != NV_FILE_PRED)
+ PRINT(" %s%s", gree, nv_type_name(type, value->reg.size));
+
+ switch (value->reg.file) {
+ case NV_FILE_GPR:
+ PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value));
+ if (value->reg.size == 8)
+ PRINT("d");
+ if (value->reg.size == 16)
+ PRINT("q");
+ break;
+ case NV_FILE_PRED:
+ PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value));
+ break;
+ case NV_FILE_COND:
+ PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value));
+ break;
+ case NV_FILE_MEM_L:
+ nv_print_address('l', -1, indir, value->reg.address);
+ break;
+ case NV_FILE_MEM_G:
+ nv_print_address('g', -1, indir, value->reg.address);
+ break;
+ case NV_FILE_MEM_A:
+ nv_print_address('a', -1, indir, value->reg.address);
+ break;
+ case NV_FILE_MEM_V:
+ nv_print_address('v', -1, indir, value->reg.address);
+ break;
+ case NV_FILE_IMM:
+ switch (type) {
+ case NV_TYPE_U16:
+ case NV_TYPE_S16:
+ PRINT(" %s0x%04x", yllw, value->reg.imm.u32);
+ break;
+ case NV_TYPE_F32:
+ PRINT(" %s%f", yllw, value->reg.imm.f32);
+ break;
+ case NV_TYPE_F64:
+ PRINT(" %s%f", yllw, value->reg.imm.f64);
+ break;
+ case NV_TYPE_U32:
+ case NV_TYPE_S32:
+ case NV_TYPE_P32:
+ case NV_TYPE_ANY:
+ PRINT(" %s0x%08x", yllw, value->reg.imm.u32);
+ break;
+ }
+ break;
+ default:
+ if (value->reg.file >= NV_FILE_MEM_C(0) &&
+ value->reg.file <= NV_FILE_MEM_C(15))
+ nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir,
+ value->reg.address);
+ else
+ NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value));
+ break;
+ }
+}
+
+static INLINE void
+nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type)
+{
+ nv_print_value(ref->value, indir, type);
+}
+
+void
+nvc0_print_instruction(struct nv_instruction *i)
+{
+ int s;
+
+ PRINT("%i: ", i->serial);
+
+ if (i->predicate >= 0) {
+ PRINT("%s%s", gree, i->cc ? "fl" : "tr");
+ nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8);
+ PRINT(" ");
+ }
+
+ PRINT("%s", gree);
+ if (NV_BASEOP(i->opcode) == NV_OP_SET)
+ PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond));
+ else
+ if (i->saturate)
+ PRINT("sat %s", nvc0_opcode_name(i->opcode));
+ else
+ PRINT("%s", nvc0_opcode_name(i->opcode));
+
+ if (i->opcode == NV_OP_CVT)
+ nv_print_value(i->def[0], NULL, i->ext.cvt.d);
+ else
+ if (i->def[0])
+ nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode));
+ else
+ if (i->target)
+ PRINT(" %s(BB:%i)", yllw, i->target->id);
+ else
+ PRINT(" #");
+
+ for (s = 1; s < 4 && i->def[s]; ++s)
+ nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode));
+ if (s > 1)
+ PRINT("%s ,", norm);
+
+ for (s = 0; s < 6 && i->src[s]; ++s) {
+ ubyte type;
+ if (s == i->indirect || s == i->predicate)
+ continue;
+ if (i->opcode == NV_OP_CVT)
+ type = i->ext.cvt.s;
+ else
+ type = NV_OPTYPE(i->opcode);
+
+ if (i->src[s]->mod)
+ PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod));
+
+ if (i->indirect >= 0 &&
+ NV_IS_MEMORY_FILE(i->src[s]->value->reg.file))
+ nv_print_ref(i->src[s], i->src[i->indirect]->value, type);
+ else
+ nv_print_ref(i->src[s], NULL, type);
+ }
+ PRINT(" %s\n", norm);
+}
+
+#define NV_MOD_SGN_12 ((NV_MOD_ABS | NV_MOD_NEG) | ((NV_MOD_ABS | NV_MOD_NEG) << 4))
+#define NV_MOD_NEG_123 (NV_MOD_NEG | (NV_MOD_NEG << 4) | (NV_MOD_NEG << 8))
+#define NV_MOD_NEG_3 (NV_MOD_NEG << 8)
+
+#define NV_MOD_SGN NV_MOD_SGN_12
+
+struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
+{
+ { NV_OP_UNDEF, "undef", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
+ { NV_OP_BIND, "bind", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
+ { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
+ { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
+ { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
+ { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 },
+ { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
+ { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
+ { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
+ { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
+ { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
+ { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_NEG_123, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_NEG_123, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_TRUNC, "trunc", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
+
+ { NV_OP_VFETCH, "vfetch", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_PFETCH, "pfetch", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_EXPORT, "export", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_EMIT, "emit", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_TEX, "tex", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_TXB, "texbias", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_TXL, "texlod", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 },
+
+ { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_DFDX, "dfdx", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_DFDY, "dfdy", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_KIL, "kil", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_BRA, "bra", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_RET, "ret", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_RET, "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_JOIN, "join", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 },
+ { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 },
+ { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
+ { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
+ { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
+ { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
+ { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 },
+ { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 },
+ { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 },
+ { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SET_F32_OR, "or set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_SLCT, "slct", NV_TYPE_F32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SLCT, "slct", NV_TYPE_S32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SLCT, "slct", NV_TYPE_U32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 },
+
+ { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 },
+
+ { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
+
+ { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+
+ { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }
+};
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
new file mode 100644
index 0000000000..f4afe083e2
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
@@ -0,0 +1,1051 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define NOUVEAU_DEBUG 1
+
+/* #define NVC0_RA_DEBUG_LIVEI */
+/* #define NVC0_RA_DEBUG_LIVE_SETS */
+/* #define NVC0_RA_DEBUG_JOIN */
+
+#include "nvc0_pc.h"
+#include "util/u_simple_list.h"
+
+#define NVC0_NUM_REGISTER_FILES 3
+
+/* @unit_shift: log2 of min allocation unit for register */
+struct register_set {
+ uint32_t bits[NVC0_NUM_REGISTER_FILES][2];
+ uint32_t last[NVC0_NUM_REGISTER_FILES];
+ int log2_unit[NVC0_NUM_REGISTER_FILES];
+ struct nv_pc *pc;
+};
+
+/* aliasing is allowed */
+static void
+intersect_register_sets(struct register_set *dst,
+ struct register_set *src1, struct register_set *src2)
+{
+ int i;
+
+ for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) {
+ dst->bits[i][0] = src1->bits[i][0] | src2->bits[i][0];
+ dst->bits[i][1] = src1->bits[i][1] | src2->bits[i][1];
+ }
+}
+
+static void
+mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask)
+{
+ int i;
+
+ for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) {
+ set->bits[i][0] = (set->bits[i][0] | mask) & umask;
+ set->bits[i][1] = (set->bits[i][1] | mask) & umask;
+ }
+}
+
+struct nv_pc_pass {
+ struct nv_pc *pc;
+ struct nv_instruction **insns;
+ uint num_insns;
+ uint pass_seq;
+};
+
+static void
+ranges_coalesce(struct nv_range *range)
+{
+ while (range->next && range->end >= range->next->bgn) {
+ struct nv_range *rnn = range->next->next;
+ assert(range->bgn <= range->next->bgn);
+ range->end = MAX2(range->end, range->next->end);
+ FREE(range->next);
+ range->next = rnn;
+ }
+}
+
+static boolean
+add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
+{
+ struct nv_range *range, **nextp = &val->livei;
+
+ if (bgn == end) /* [a, a) is invalid / empty */
+ return TRUE;
+
+ for (range = val->livei; range; range = range->next) {
+ if (end < range->bgn)
+ break; /* insert before */
+
+ if (bgn > range->end) {
+ nextp = &range->next;
+ continue; /* insert after */
+ }
+
+ /* overlap */
+ if (bgn < range->bgn) {
+ range->bgn = bgn;
+ if (end > range->end)
+ range->end = end;
+ ranges_coalesce(range);
+ return TRUE;
+ }
+ if (end > range->end) {
+ range->end = end;
+ ranges_coalesce(range);
+ return TRUE;
+ }
+ assert(bgn >= range->bgn);
+ assert(end <= range->end);
+ return TRUE;
+ }
+
+ if (!new_range)
+ new_range = CALLOC_STRUCT(nv_range);
+
+ new_range->bgn = bgn;
+ new_range->end = end;
+ new_range->next = range;
+ *(nextp) = new_range;
+ return FALSE;
+}
+
+static void
+add_range(struct nv_value *val, struct nv_basic_block *b, int end)
+{
+ int bgn;
+
+ if (!val->insn) /* ignore non-def values */
+ return;
+ assert(b->entry->serial <= b->exit->serial);
+ assert(b->phi->serial <= end);
+ assert(b->exit->serial + 1 >= end);
+
+ bgn = val->insn->serial;
+ if (bgn < b->entry->serial || bgn > b->exit->serial)
+ bgn = b->entry->serial;
+
+ assert(bgn <= end);
+
+ add_range_ex(val, bgn, end, NULL);
+}
+
+#if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI)
+static void
+livei_print(struct nv_value *a)
+{
+ struct nv_range *r = a->livei;
+
+ debug_printf("livei %i: ", a->n);
+ while (r) {
+ debug_printf("[%i, %i) ", r->bgn, r->end);
+ r = r->next;
+ }
+ debug_printf("\n");
+}
+#endif
+
+static void
+livei_unify(struct nv_value *dst, struct nv_value *src)
+{
+ struct nv_range *range, *next;
+
+ for (range = src->livei; range; range = next) {
+ next = range->next;
+ if (add_range_ex(dst, range->bgn, range->end, range))
+ FREE(range);
+ }
+ src->livei = NULL;
+}
+
+static void
+livei_release(struct nv_value *val)
+{
+ struct nv_range *range, *next;
+
+ for (range = val->livei; range; range = next) {
+ next = range->next;
+ FREE(range);
+ }
+}
+
+static boolean
+livei_have_overlap(struct nv_value *a, struct nv_value *b)
+{
+ struct nv_range *r_a, *r_b;
+
+ for (r_a = a->livei; r_a; r_a = r_a->next) {
+ for (r_b = b->livei; r_b; r_b = r_b->next) {
+ if (r_b->bgn < r_a->end &&
+ r_b->end > r_a->bgn)
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static int
+livei_end(struct nv_value *a)
+{
+ struct nv_range *r = a->livei;
+
+ assert(r);
+ while (r->next)
+ r = r->next;
+ return r->end;
+}
+
+static boolean
+livei_contains(struct nv_value *a, int pos)
+{
+ struct nv_range *r;
+
+ for (r = a->livei; r && r->bgn <= pos; r = r->next)
+ if (r->end > pos)
+ return TRUE;
+ return FALSE;
+}
+
+static boolean
+reg_assign(struct register_set *set, struct nv_value **def, int n)
+{
+ int i, id, s, k;
+ uint32_t m;
+ int f = def[0]->reg.file;
+
+ k = n;
+ if (k == 3)
+ k = 4;
+ s = (k * def[0]->reg.size) >> set->log2_unit[f];
+ m = (1 << s) - 1;
+
+ id = set->last[f];
+
+ for (i = 0; i * 32 < set->last[f]; ++i) {
+ if (set->bits[f][i] == 0xffffffff)
+ continue;
+
+ for (id = 0; id < 32; id += s)
+ if (!(set->bits[f][i] & (m << id)))
+ break;
+ if (id < 32)
+ break;
+ }
+ if (i * 32 + id > set->last[f])
+ return FALSE;
+
+ set->bits[f][i] |= m << id;
+
+ id += i * 32;
+
+ set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
+
+ for (i = 0; i < n; ++i)
+ if (def[i]->livei)
+ def[i]->reg.id = id++;
+
+ return TRUE;
+}
+
+static INLINE void
+reg_occupy(struct register_set *set, struct nv_value *val)
+{
+ int id = val->reg.id, f = val->reg.file;
+ uint32_t m;
+
+ if (id < 0)
+ return;
+ m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
+
+ set->bits[f][id / 32] |= m << (id % 32);
+
+ if (set->pc->max_reg[f] < id)
+ set->pc->max_reg[f] = id;
+}
+
+static INLINE void
+reg_release(struct register_set *set, struct nv_value *val)
+{
+ int id = val->reg.id, f = val->reg.file;
+ uint32_t m;
+
+ if (id < 0)
+ return;
+ m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
+
+ set->bits[f][id / 32] &= ~(m << (id % 32));
+}
+
+static INLINE boolean
+join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ int i;
+ struct nv_value *val;
+
+ if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
+ return FALSE;
+
+ if (a->join->reg.id == b->join->reg.id)
+ return TRUE;
+
+ /* either a or b or both have been assigned */
+
+ if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
+ return FALSE;
+ else
+ if (b->join->reg.id >= 0) {
+ if (b->join->reg.id == 63)
+ return FALSE;
+ val = a;
+ a = b;
+ b = val;
+ } else
+ if (a->join->reg.id == 63)
+ return FALSE;
+
+ for (i = 0; i < ctx->pc->num_values; ++i) {
+ val = &ctx->pc->values[i];
+
+ if (val->join->reg.id != a->join->reg.id)
+ continue;
+ if (val->join != a->join && livei_have_overlap(val->join, b->join))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static INLINE void
+do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ int j;
+ struct nv_value *bjoin = b->join;
+
+ if (b->join->reg.id >= 0)
+ a->join->reg.id = b->join->reg.id;
+
+ livei_unify(a->join, b->join);
+
+#ifdef NVC0_RA_DEBUG_JOIN
+ debug_printf("joining %i to %i\n", b->n, a->n);
+#endif
+
+ /* make a->join the new representative */
+ for (j = 0; j < ctx->pc->num_values; ++j)
+ if (ctx->pc->values[j].join == bjoin)
+ ctx->pc->values[j].join = a->join;
+
+ assert(b->join == a->join);
+}
+
+static INLINE boolean
+try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ if (!join_allowed(ctx, a, b)) {
+#ifdef NVC0_RA_DEBUG_JOIN
+ debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
+#endif
+ return FALSE;
+ }
+ if (livei_have_overlap(a->join, b->join)) {
+#ifdef NVC0_RA_DEBUG_JOIN
+ debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
+ livei_print(a);
+ livei_print(b);
+#endif
+ return FALSE;
+ }
+
+ do_join_values(ctx, a, b);
+
+ return TRUE;
+}
+
+static void
+join_values_nofail(struct nv_pc_pass *ctx,
+ struct nv_value *a, struct nv_value *b, boolean type_only)
+{
+ if (type_only) {
+ assert(join_allowed(ctx, a, b));
+ do_join_values(ctx, a, b);
+ } else {
+ boolean ok = try_join_values(ctx, a, b);
+ if (!ok) {
+ NOUVEAU_ERR("failed to coalesce values\n");
+ }
+ }
+}
+
+static INLINE boolean
+need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
+{
+ int i = 0, n = 0;
+
+ for (; i < 2; ++i)
+ if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
+ ++n;
+
+ return (b->num_in > 1) && (n == 2);
+}
+
+/* Look for the @phi's operand whose definition reaches @b. */
+static int
+phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
+ struct nv_basic_block *tb)
+{
+ struct nv_ref *srci, *srcj;
+ int i, j;
+
+ for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
+ srci = phi->src[i];
+ /* if already replaced, check with original source first */
+ if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV)
+ srci = srci->value->insn->src[0];
+ if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL))
+ continue;
+ /* NOTE: back-edges are ignored by the reachable-by check */
+ if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb,
+ srci->value->insn->bb, NULL)) {
+ j = i;
+ srcj = srci;
+ }
+ }
+ if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL))
+ if (!nvc0_bblock_reachable_by(srcj->value->insn->bb,
+ phi->def[0]->insn->bb, NULL))
+ j = -1;
+ return j;
+}
+
+/* For each operand of each PHI in b, generate a new value by inserting a MOV
+ * at the end of the block it is coming from and replace the operand with its
+ * result. This eliminates liveness conflicts and enables us to let values be
+ * copied to the right register if such a conflict exists nonetheless.
+ *
+ * These MOVs are also crucial in making sure the live intervals of phi srces
+ * are extended until the end of the loop, since they are not included in the
+ * live-in sets.
+ */
+static int
+pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i, *ni;
+ struct nv_value *val;
+ struct nv_basic_block *p, *pn;
+ int n, j;
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ for (n = 0; n < b->num_in; ++n) {
+ p = pn = b->in[n];
+ assert(p);
+
+ if (need_new_else_block(b, p)) {
+ pn = new_basic_block(ctx->pc);
+
+ if (p->out[0] == b)
+ p->out[0] = pn;
+ else
+ p->out[1] = pn;
+
+ if (p->exit->target == b) /* target to new else-block */
+ p->exit->target = pn;
+
+ b->in[n] = pn;
+
+ pn->out[0] = b;
+ pn->in[0] = p;
+ pn->num_in = 1;
+ }
+ ctx->pc->current_block = pn;
+
+ for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
+ j = phi_opnd_for_bb(i, p, b);
+
+ if (j < 0) {
+ val = i->def[0];
+ } else {
+ val = i->src[j]->value;
+ if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) {
+ j = -1;
+ /* use original value, we already encountered & replaced it */
+ val = val->insn->src[0]->value;
+ }
+ }
+ if (j < 0) /* need an additional source ? */
+ for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j);
+ assert(j < 6); /* XXX: really ugly shaders */
+
+ ni = new_instruction(ctx->pc, NV_OP_MOV);
+ if (ni->prev && ni->prev->target)
+ nvc0_insns_permute(ni->prev, ni);
+
+ ni->def[0] = new_value_like(ctx->pc, val);
+ ni->def[0]->insn = ni;
+ nv_reference(ctx->pc, ni, 0, val);
+ nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */
+ i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV;
+ }
+
+ if (pn != p && pn->exit) {
+ ctx->pc->current_block = b->in[n ? 0 : 1];
+ ni = new_instruction(ctx->pc, NV_OP_BRA);
+ ni->target = b;
+ ni->terminator = 1;
+ }
+ }
+
+ for (j = 0; j < 2; ++j)
+ if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
+ pass_generate_phi_movs(ctx, b->out[j]);
+
+ return 0;
+}
+
+#define JOIN_MASK_PHI (1 << 0)
+#define JOIN_MASK_SELECT (1 << 1)
+#define JOIN_MASK_MOV (1 << 2)
+#define JOIN_MASK_BIND (1 << 3)
+
+static int
+pass_join_values(struct nv_pc_pass *ctx, unsigned mask)
+{
+ int c, n;
+
+ for (n = 0; n < ctx->num_insns; ++n) {
+ struct nv_instruction *i = ctx->insns[n];
+
+ switch (i->opcode) {
+ case NV_OP_PHI:
+ if (!(mask & JOIN_MASK_PHI))
+ break;
+ for (c = 0; c < 6 && i->src[c]; ++c)
+ join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE);
+ break;
+ case NV_OP_MOV:
+ if (!(mask & JOIN_MASK_MOV))
+ break;
+ if (i->src[0]->value->insn && !i->src[0]->value->insn->def[1])
+ try_join_values(ctx, i->def[0], i->src[0]->value);
+ break;
+ case NV_OP_SELECT:
+ if (!(mask & JOIN_MASK_SELECT))
+ break;
+ for (c = 0; c < 6 && i->src[c]; ++c)
+ join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE);
+ break;
+ case NV_OP_BIND:
+ if (!(mask & JOIN_MASK_BIND))
+ break;
+ for (c = 0; c < 4 && i->src[c]; ++c)
+ join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE);
+ break;
+ case NV_OP_TEX:
+ case NV_OP_TXB:
+ case NV_OP_TXL:
+ case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+/* Order the instructions so that live intervals can be expressed in numbers. */
+static void
+pass_order_instructions(void *priv, struct nv_basic_block *b)
+{
+ struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
+ struct nv_instruction *i;
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ assert(!b->exit || !b->exit->next);
+ for (i = b->phi; i; i = i->next) {
+ i->serial = ctx->num_insns;
+ ctx->insns[ctx->num_insns++] = i;
+ }
+}
+
+static void
+bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
+{
+#ifdef NVC0_RA_DEBUG_LIVE_SETS
+ struct nv_value *val;
+ int j;
+
+ debug_printf("LIVE-INs of BB:%i: ", b->id);
+
+ for (j = 0; j < pc->num_values; ++j) {
+ if (!(b->live_set[j / 32] & (1 << (j % 32))))
+ continue;
+ val = &pc->values[j];
+ if (!val->insn)
+ continue;
+ debug_printf("%i ", val->n);
+ }
+ debug_printf("\n");
+#endif
+}
+
+static INLINE void
+live_set_add(struct nv_basic_block *b, struct nv_value *val)
+{
+ if (!val->insn) /* don't add non-def values */
+ return;
+ b->live_set[val->n / 32] |= 1 << (val->n % 32);
+}
+
+static INLINE void
+live_set_rem(struct nv_basic_block *b, struct nv_value *val)
+{
+ b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
+}
+
+static INLINE boolean
+live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
+{
+ int n = ref->value->n;
+ return b->live_set[n / 32] & (1 << (n % 32));
+}
+
+/* The live set of a block contains those values that are live immediately
+ * before the beginning of the block, so do a backwards scan.
+ */
+static int
+pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i;
+ int j, n, ret = 0;
+
+ if (b->pass_seq >= ctx->pc->pass_seq)
+ return 0;
+ b->pass_seq = ctx->pc->pass_seq;
+
+ /* slight hack for undecidedness: set phi = entry if it's undefined */
+ if (!b->phi)
+ b->phi = b->entry;
+
+ for (n = 0; n < 2; ++n) {
+ if (!b->out[n] || b->out[n] == b)
+ continue;
+ ret = pass_build_live_sets(ctx, b->out[n]);
+ if (ret)
+ return ret;
+
+ if (n == 0) {
+ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+ b->live_set[j] = b->out[n]->live_set[j];
+ } else {
+ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+ b->live_set[j] |= b->out[n]->live_set[j];
+ }
+ }
+
+ if (!b->entry)
+ return 0;
+
+ bb_live_set_print(ctx->pc, b);
+
+ for (i = b->exit; i != b->entry->prev; i = i->prev) {
+ for (j = 0; j < 5 && i->def[j]; j++)
+ live_set_rem(b, i->def[j]);
+ for (j = 0; j < 6 && i->src[j]; j++)
+ live_set_add(b, i->src[j]->value);
+ }
+ for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
+ live_set_rem(b, i->def[0]);
+
+ bb_live_set_print(ctx->pc, b);
+
+ return 0;
+}
+
+static void collect_live_values(struct nv_basic_block *b, const int n)
+{
+ int i;
+
+ /* XXX: what to do about back/fake-edges (used to include both here) ? */
+ if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) {
+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
+ for (i = 0; i < n; ++i)
+ b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
+ } else {
+ memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
+ }
+ } else
+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
+ memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
+ } else {
+ memset(b->live_set, 0, n * sizeof(uint32_t));
+ }
+}
+
+/* NOTE: the live intervals of phi functions start at the first non-phi insn. */
+static int
+pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i, *i_stop;
+ int j, s;
+ const int n = (ctx->pc->num_values + 31) / 32;
+
+ /* verify that first block does not have live-in values */
+ if (b->num_in == 0)
+ for (j = 0; j < n; ++j)
+ assert(b->live_set[j] == 0);
+
+ collect_live_values(b, n);
+
+ /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
+ for (j = 0; j < 2; ++j) {
+ if (!b->out[j] || !b->out[j]->phi)
+ continue;
+ for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
+ live_set_rem(b, i->def[0]);
+
+ for (s = 0; s < 6 && i->src[s]; ++s) {
+ assert(i->src[s]->value->insn);
+ if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb,
+ b->out[j]))
+ live_set_add(b, i->src[s]->value);
+ else
+ live_set_rem(b, i->src[s]->value);
+ }
+ }
+ }
+
+ /* remaining live-outs are live until the end */
+ if (b->exit) {
+ for (j = 0; j < ctx->pc->num_values; ++j) {
+ if (!(b->live_set[j / 32] & (1 << (j % 32))))
+ continue;
+ add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
+#ifdef NVC0_RA_DEBUG_LIVEI
+ debug_printf("adding range for live value %i: ", j);
+ livei_print(&ctx->pc->values[j]);
+#endif
+ }
+ }
+
+ i_stop = b->entry ? b->entry->prev : NULL;
+
+ /* don't have to include phi functions here (will have 0 live range) */
+ for (i = b->exit; i != i_stop; i = i->prev) {
+ assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
+ for (j = 0; j < 4 && i->def[j]; ++j)
+ live_set_rem(b, i->def[j]);
+
+ for (j = 0; j < 6 && i->src[j]; ++j) {
+ if (!live_set_test(b, i->src[j])) {
+ live_set_add(b, i->src[j]->value);
+ add_range(i->src[j]->value, b, i->serial);
+#ifdef NVC0_RA_DEBUG_LIVEI
+ debug_printf("adding range for source %i (ends living): ",
+ i->src[j]->value->n);
+ livei_print(i->src[j]->value);
+#endif
+ }
+ }
+ }
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
+ pass_build_intervals(ctx, b->out[0]);
+
+ if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
+ pass_build_intervals(ctx, b->out[1]);
+
+ return 0;
+}
+
+static INLINE void
+nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set)
+{
+ memset(set, 0, sizeof(*set));
+
+ set->last[NV_FILE_GPR] = 62;
+ set->last[NV_FILE_PRED] = 6;
+ set->last[NV_FILE_COND] = 1;
+
+ set->log2_unit[NV_FILE_GPR] = 2;
+ set->log2_unit[NV_FILE_COND] = 0;
+ set->log2_unit[NV_FILE_PRED] = 0;
+
+ set->pc = pc;
+}
+
+static void
+insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
+{
+ struct nv_value *elem;
+
+ for (elem = list->prev;
+ elem != list && elem->livei->bgn > nval->livei->bgn;
+ elem = elem->prev);
+ /* now elem begins before or at the same time as val */
+
+ nval->prev = elem;
+ nval->next = elem->next;
+ elem->next->prev = nval;
+ elem->next = nval;
+}
+
+static void
+collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head,
+ boolean assigned_only)
+{
+ struct nv_value *val;
+ int k, n;
+
+ make_empty_list(head);
+
+ for (n = 0; n < ctx->num_insns; ++n) {
+ struct nv_instruction *i = ctx->insns[n];
+
+ /* for joined values, only the representative will have livei != NULL */
+ for (k = 0; k < 5; ++k) {
+ if (i->def[k] && i->def[k]->livei)
+ if (!assigned_only || i->def[k]->reg.id >= 0)
+ insert_ordered_tail(head, i->def[k]);
+ }
+ }
+
+ for (val = head->next; val != head->prev; val = val->next) {
+ assert(val->join == val);
+ assert(val->livei->bgn <= val->next->livei->bgn);
+ }
+}
+
+static int
+pass_linear_scan(struct nv_pc_pass *ctx)
+{
+ struct register_set f, free;
+ struct nv_value *cur, *val, *tmp[2];
+ struct nv_value active, inactive, handled, unhandled;
+
+ make_empty_list(&active);
+ make_empty_list(&inactive);
+ make_empty_list(&handled);
+
+ nvc0_ctor_register_set(ctx->pc, &free);
+
+ collect_register_values(ctx, &unhandled, FALSE);
+
+ foreach_s(cur, tmp[0], &unhandled) {
+ remove_from_list(cur);
+
+ foreach_s(val, tmp[1], &active) {
+ if (livei_end(val) <= cur->livei->bgn) {
+ reg_release(&free, val);
+ move_to_head(&handled, val);
+ } else
+ if (!livei_contains(val, cur->livei->bgn)) {
+ reg_release(&free, val);
+ move_to_head(&inactive, val);
+ }
+ }
+
+ foreach_s(val, tmp[1], &inactive) {
+ if (livei_end(val) <= cur->livei->bgn)
+ move_to_head(&handled, val);
+ else
+ if (livei_contains(val, cur->livei->bgn)) {
+ reg_occupy(&free, val);
+ move_to_head(&active, val);
+ }
+ }
+
+ f = free;
+
+ foreach(val, &inactive)
+ if (livei_have_overlap(val, cur))
+ reg_occupy(&f, val);
+
+ foreach(val, &unhandled)
+ if (val->reg.id >= 0 && livei_have_overlap(val, cur))
+ reg_occupy(&f, val);
+
+ if (cur->reg.id < 0) {
+ boolean mem = !reg_assign(&f, &cur, 1);
+
+ if (mem) {
+ NOUVEAU_ERR("out of registers\n");
+ abort();
+ }
+ }
+ insert_at_head(&active, cur);
+ reg_occupy(&free, cur);
+ }
+
+ return 0;
+}
+
+/* Allocate values defined by instructions such as TEX, which have to be
+ * assigned to consecutive registers.
+ * Linear scan doesn't really work here since the values can have different
+ * live intervals.
+ */
+static int
+pass_allocate_constrained_values(struct nv_pc_pass *ctx)
+{
+ struct nv_value regvals, *val;
+ struct nv_instruction *i;
+ struct nv_value *defs[4];
+ struct register_set regs[4];
+ int n, vsize, c;
+ uint32_t mask;
+ boolean mem;
+
+ collect_register_values(ctx, &regvals, TRUE);
+
+ for (n = 0; n < ctx->num_insns; ++n) {
+ i = ctx->insns[n];
+ vsize = nvi_vector_size(i);
+ if (!(vsize > 1))
+ continue;
+ assert(vsize <= 4);
+
+ for (c = 0; c < vsize; ++c)
+ defs[c] = i->def[c]->join;
+
+ if (defs[0]->reg.id >= 0) {
+ for (c = 1; c < vsize; ++c)
+ assert(defs[c]->reg.id >= 0);
+ continue;
+ }
+
+ for (c = 0; c < vsize; ++c) {
+ nvc0_ctor_register_set(ctx->pc, &regs[c]);
+
+ foreach(val, &regvals) {
+ if (val->reg.id >= 0 && livei_have_overlap(val, defs[c]))
+ reg_occupy(&regs[c], val);
+ }
+ mask = 0x11111111;
+ if (vsize == 2) /* granularity is 2 and not 4 */
+ mask |= 0x11111111 << 2;
+ mask_register_set(&regs[c], 0, mask << c);
+
+ if (defs[c]->livei)
+ insert_ordered_tail(&regvals, defs[c]);
+ }
+ for (c = 1; c < vsize; ++c)
+ intersect_register_sets(&regs[0], &regs[0], &regs[c]);
+
+ mem = !reg_assign(&regs[0], &defs[0], vsize);
+
+ if (mem) {
+ NOUVEAU_ERR("out of registers\n");
+ abort();
+ }
+ }
+ return 0;
+}
+
+static int
+nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
+{
+ struct nv_pc_pass *ctx;
+ int i, ret;
+
+ NOUVEAU_DBG("REGISTER ALLOCATION - entering\n");
+
+ ctx = CALLOC_STRUCT(nv_pc_pass);
+ if (!ctx)
+ return -1;
+ ctx->pc = pc;
+
+ ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
+ if (!ctx->insns) {
+ FREE(ctx);
+ return -1;
+ }
+
+ pc->pass_seq++;
+ ret = pass_generate_phi_movs(ctx, root);
+ assert(!ret);
+
+#ifdef NVC0_RA_DEBUG_LIVEI
+ nvc0_print_function(root);
+#endif
+
+ for (i = 0; i < pc->loop_nesting_bound; ++i) {
+ pc->pass_seq++;
+ ret = pass_build_live_sets(ctx, root);
+ assert(!ret && "live sets");
+ if (ret) {
+ NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
+ goto out;
+ }
+ }
+
+ pc->pass_seq++;
+ nvc0_pc_pass_in_order(root, pass_order_instructions, ctx);
+
+ pc->pass_seq++;
+ ret = pass_build_intervals(ctx, root);
+ assert(!ret && "build intervals");
+ if (ret) {
+ NOUVEAU_ERR("failed to build live intervals\n");
+ goto out;
+ }
+
+#ifdef NVC0_RA_DEBUG_LIVEI
+ for (i = 0; i < pc->num_values; ++i)
+ livei_print(&pc->values[i]);
+#endif
+
+ ret = pass_join_values(ctx, JOIN_MASK_PHI);
+ if (ret)
+ goto out;
+ ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_BIND);
+ if (ret)
+ goto out;
+ ret = pass_join_values(ctx, JOIN_MASK_MOV);
+ if (ret)
+ goto out;
+ ret = pass_allocate_constrained_values(ctx);
+ if (ret)
+ goto out;
+ ret = pass_linear_scan(ctx);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < pc->num_values; ++i)
+ livei_release(&pc->values[i]);
+
+ NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n");
+
+out:
+ FREE(ctx->insns);
+ FREE(ctx);
+ return ret;
+}
+
+int
+nvc0_pc_exec_pass1(struct nv_pc *pc)
+{
+ int i, ret;
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
+ return ret;
+ return 0;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
new file mode 100644
index 0000000000..3c59213176
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -0,0 +1,728 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_defines.h"
+
+#define NOUVEAU_DEBUG
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "nvc0_context.h"
+#include "nvc0_pc.h"
+
+static unsigned
+nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
+{
+ unsigned mask = inst->Dst[0].Register.WriteMask;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
+ case TGSI_OPCODE_DP3:
+ return 0x7;
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_KIL: /* WriteMask ignored */
+ return 0xf;
+ case TGSI_OPCODE_DST:
+ return mask & (c ? 0xa : 0x6);
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_LOG:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SCS:
+ return 0x1;
+ case TGSI_OPCODE_IF:
+ return 0x1;
+ case TGSI_OPCODE_LIT:
+ return 0xb;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ {
+ const struct tgsi_instruction_texture *tex;
+
+ assert(inst->Instruction.Texture);
+ tex = &inst->Texture;
+
+ mask = 0x7;
+ if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXD)
+ mask |= 0x8; /* bias, lod or proj */
+
+ switch (tex->Texture) {
+ case TGSI_TEXTURE_1D:
+ mask &= 0x9;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ mask &= 0x5;
+ break;
+ case TGSI_TEXTURE_2D:
+ mask &= 0xb;
+ break;
+ default:
+ break;
+ }
+ }
+ return mask;
+ case TGSI_OPCODE_XPD:
+ {
+ unsigned x = 0;
+ if (mask & 1) x |= 0x6;
+ if (mask & 2) x |= 0x5;
+ if (mask & 4) x |= 0x3;
+ return x;
+ }
+ default:
+ break;
+ }
+
+ return mask;
+}
+
+static void
+nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id)
+{
+ int i, c;
+
+ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
+ for (c = 0; c < 4; ++c)
+ ti->input_access[i][c] = id;
+
+ ti->indirect_inputs = TRUE;
+}
+
+static void
+nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id)
+{
+ int i, c;
+
+ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
+ for (c = 0; c < 4; ++c)
+ ti->output_access[i][c] = id;
+
+ ti->indirect_outputs = TRUE;
+}
+
+static INLINE unsigned
+nvc0_system_value_location(unsigned sn, unsigned si, boolean *is_input)
+{
+ /* NOTE: locations 0xfxx indicate special regs */
+ switch (sn) {
+ /*
+ case TGSI_SEMANTIC_VERTEXID:
+ *is_input = TRUE;
+ return 0x2fc;
+ */
+ case TGSI_SEMANTIC_PRIMID:
+ *is_input = TRUE;
+ return 0x60;
+ /*
+ case TGSI_SEMANTIC_LAYER_INDEX:
+ return 0x64;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ return 0x68;
+ */
+ case TGSI_SEMANTIC_INSTANCEID:
+ *is_input = TRUE;
+ return 0x2f8;
+ case TGSI_SEMANTIC_FACE:
+ *is_input = TRUE;
+ return 0x3fc;
+ /*
+ case TGSI_SEMANTIC_INVOCATIONID:
+ return 0xf11;
+ */
+ default:
+ assert(0);
+ return 0x000;
+ }
+}
+
+static INLINE unsigned
+nvc0_varying_location(unsigned sn, unsigned si)
+{
+ switch (sn) {
+ case TGSI_SEMANTIC_POSITION:
+ return 0x70;
+ case TGSI_SEMANTIC_COLOR:
+ return 0x280 + (si * 16); /* are these hard-wired ? */
+ case TGSI_SEMANTIC_BCOLOR:
+ return 0x2a0 + (si * 16);
+ case TGSI_SEMANTIC_FOG:
+ return 0x270;
+ case TGSI_SEMANTIC_PSIZE:
+ return 0x6c;
+ /*
+ case TGSI_SEMANTIC_PNTC:
+ return 0x2e0;
+ */
+ case TGSI_SEMANTIC_GENERIC:
+ /* We'd really like to distinguish between TEXCOORD and GENERIC here,
+ * since only 0x300 to 0x37c can be replaced by sprite coordinates.
+ * Also, gl_PointCoord should be a system value and must be assigned to
+ * address 0x2e0. For now, let's cheat:
+ */
+ assert(si < 31);
+ if (si <= 7)
+ return 0x300 + si * 16;
+ if (si == 9)
+ return 0x2e0;
+ return 0x80 + ((si - 8) * 16);
+ case TGSI_SEMANTIC_NORMAL:
+ return 0x360;
+ case TGSI_SEMANTIC_PRIMID:
+ return 0x40;
+ case TGSI_SEMANTIC_FACE:
+ return 0x3fc;
+ case TGSI_SEMANTIC_EDGEFLAG: /* doesn't exist, set value like for an sreg */
+ return 0xf00;
+ /*
+ case TGSI_SEMANTIC_CLIP_DISTANCE:
+ return 0x2c0 + (si * 4);
+ */
+ default:
+ assert(0);
+ return 0x000;
+ }
+}
+
+static INLINE unsigned
+nvc0_interp_mode(const struct tgsi_full_declaration *decl)
+{
+ unsigned mode;
+
+ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
+ mode = NVC0_INTERP_FLAT;
+ else
+ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+ mode = NVC0_INTERP_PERSPECTIVE;
+ else
+ mode = NVC0_INTERP_LINEAR;
+
+ if (decl->Declaration.Centroid)
+ mode |= NVC0_INTERP_CENTROID;
+
+ return mode;
+}
+
+static void
+prog_immediate(struct nvc0_translation_info *ti,
+ const struct tgsi_full_immediate *imm)
+{
+ int c;
+ unsigned n = ti->immd32_nr++;
+
+ assert(ti->immd32_nr <= ti->scan.immediate_count);
+
+ for (c = 0; c < 4; ++c)
+ ti->immd32[n * 4 + c] = imm->u[c].Uint;
+
+ ti->immd32_ty[n] = imm->Immediate.DataType;
+}
+
+static boolean
+prog_decl(struct nvc0_translation_info *ti,
+ const struct tgsi_full_declaration *decl)
+{
+ unsigned i, c;
+ unsigned sn = TGSI_SEMANTIC_GENERIC;
+ unsigned si = 0;
+ const unsigned first = decl->Range.First;
+ const unsigned last = decl->Range.Last;
+
+ if (decl->Declaration.Semantic) {
+ sn = decl->Semantic.Name;
+ si = decl->Semantic.Index;
+ }
+
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ for (i = first; i <= last; ++i) {
+ if (ti->prog->type == PIPE_SHADER_VERTEX) {
+ for (c = 0; c < 4; ++c)
+ ti->input_loc[i][c] = 0x80 + i * 16 + c * 4;
+ } else {
+ for (c = 0; c < 4; ++c)
+ ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
+ /* for sprite coordinates: */
+ ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4;
+ }
+ if (ti->prog->type == PIPE_SHADER_FRAGMENT)
+ ti->interp_mode[i] = nvc0_interp_mode(decl);
+ }
+ break;
+ case TGSI_FILE_OUTPUT:
+ for (i = first; i <= last; ++i, ++si) {
+ if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
+ si = i;
+ if (i == ti->fp_depth_output) {
+ ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4;
+ } else {
+ if (i > ti->fp_depth_output)
+ si -= 1;
+ for (c = 0; c < 4; ++c)
+ ti->output_loc[i][c] = si * 4 + c;
+ }
+ } else {
+ if (sn == TGSI_SEMANTIC_EDGEFLAG)
+ ti->edgeflag_out = i;
+ for (c = 0; c < 4; ++c)
+ ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
+ /* for TFB_VARYING_LOCS: */
+ ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4;
+ }
+ }
+ break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ i = first;
+ ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]);
+ assert(first == last);
+ break;
+ case TGSI_FILE_TEMPORARY:
+ ti->temp128_nr = MAX2(ti->temp128_nr, last + 1);
+ break;
+ case TGSI_FILE_NULL:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_IMMEDIATE:
+ case TGSI_FILE_PREDICATE:
+ break;
+ default:
+ NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static void
+prog_inst(struct nvc0_translation_info *ti,
+ const struct tgsi_full_instruction *inst, int id)
+{
+ const struct tgsi_dst_register *dst;
+ const struct tgsi_src_register *src;
+ int s, c, k;
+ unsigned mask;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
+ ti->subr[ti->num_subrs].first_insn = id - 1;
+ ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */
+ ++ti->num_subrs;
+ }
+
+ if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+ dst = &inst->Dst[0].Register;
+
+ for (c = 0; c < 4; ++c) {
+ if (dst->Indirect)
+ nvc0_indirect_outputs(ti, id);
+ if (!(dst->WriteMask & (1 << c)))
+ continue;
+ ti->output_access[dst->Index][c] = id;
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
+ inst->Src[0].Register.File == TGSI_FILE_INPUT &&
+ dst->Index == ti->edgeflag_out)
+ ti->prog->vp.edgeflag = inst->Src[0].Register.Index;
+ } else
+ if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
+ if (inst->Dst[0].Register.Indirect)
+ ti->require_stores = TRUE;
+ }
+
+ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
+ src = &inst->Src[s].Register;
+ if (src->File == TGSI_FILE_TEMPORARY)
+ if (inst->Src[s].Register.Indirect)
+ ti->require_stores = TRUE;
+ if (src->File != TGSI_FILE_INPUT)
+ continue;
+ mask = nvc0_tgsi_src_mask(inst, s);
+
+ if (inst->Src[s].Register.Indirect)
+ nvc0_indirect_inputs(ti, id);
+
+ for (c = 0; c < 4; ++c) {
+ if (!(mask & (1 << c)))
+ continue;
+ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+ if (k <= TGSI_SWIZZLE_W)
+ ti->input_access[src->Index][k] = id;
+ }
+ }
+}
+
+/* Probably should introduce something like struct tgsi_function_declaration
+ * instead of trying to guess inputs/outputs.
+ */
+static void
+prog_subroutine_inst(struct nvc0_subroutine *subr,
+ const struct tgsi_full_instruction *inst)
+{
+ const struct tgsi_dst_register *dst;
+ const struct tgsi_src_register *src;
+ int s, c, k;
+ unsigned mask;
+
+ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
+ src = &inst->Src[s].Register;
+ if (src->File != TGSI_FILE_TEMPORARY)
+ continue;
+ mask = nvc0_tgsi_src_mask(inst, s);
+
+ for (c = 0; c < 4; ++c) {
+ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+
+ if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
+ if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
+ subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
+ }
+ }
+
+ if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
+ dst = &inst->Dst[0].Register;
+
+ for (c = 0; c < 4; ++c)
+ if (dst->WriteMask & (1 << c))
+ subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
+ }
+}
+
+static int
+nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+{
+ int i, c;
+ unsigned a;
+
+ for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
+ for (c = 0; c < 4; ++c, ++a)
+ if (ti->input_access[i][c])
+ vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */
+ }
+
+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
+ a = (ti->output_loc[i][0] - 0x40) / 4;
+ if (ti->output_loc[i][0] >= 0xf00)
+ continue;
+ for (c = 0; c < 4; ++c, ++a) {
+ if (!ti->output_access[i][c])
+ continue;
+ vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */
+ }
+ }
+
+ for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
+ a = ti->sysval_loc[i] / 4;
+ if (a > 0 && a < (0xf00 / 4))
+ vp->hdr[(ti->sysval_in[i] ? 5 : 13) + a / 32] |= 1 << (a % 32);
+ }
+
+ return 0;
+}
+
+static int
+nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+{
+ vp->hdr[0] = 0x20461;
+ vp->hdr[4] = 0xff000;
+
+ vp->hdr[18] = (1 << vp->vp.num_ucps) - 1;
+
+ return nvc0_vp_gp_gen_header(vp, ti);
+}
+
+static int
+nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
+{
+ unsigned invocations = 1;
+ unsigned max_output_verts, output_prim;
+ unsigned i;
+
+ gp->hdr[0] = 0x21061;
+
+ for (i = 0; i < ti->scan.num_properties; ++i) {
+ switch (ti->scan.properties[i].name) {
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ output_prim = ti->scan.properties[i].data[0];
+ break;
+ case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+ max_output_verts = ti->scan.properties[i].data[0];
+ assert(max_output_verts < 512);
+ break;
+ /*
+ case TGSI_PROPERTY_GS_INVOCATIONS:
+ invocations = ti->scan.properties[i].data[0];
+ assert(invocations <= 32);
+ break;
+ */
+ default:
+ break;
+ }
+ }
+
+ gp->hdr[2] = MIN2(invocations, 32) << 24;
+
+ switch (output_prim) {
+ case PIPE_PRIM_POINTS:
+ gp->hdr[3] = 0x01000000;
+ gp->hdr[0] |= 0xf0000000;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ gp->hdr[3] = 0x06000000;
+ gp->hdr[0] |= 0x10000000;
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ gp->hdr[3] = 0x07000000;
+ gp->hdr[0] |= 0x10000000;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ gp->hdr[4] = max_output_verts & 0x1ff;
+
+ return nvc0_vp_gp_gen_header(gp, ti);
+}
+
+static int
+nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
+{
+ int i, c;
+ unsigned a, m;
+
+ fp->hdr[0] = 0x21462;
+ fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
+
+ if (ti->scan.uses_kill)
+ fp->hdr[0] |= 0x8000;
+ if (ti->scan.writes_z) {
+ fp->hdr[19] |= 0x2;
+ if (ti->scan.num_outputs > 2)
+ fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
+ } else {
+ if (ti->scan.num_outputs > 1)
+ fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
+ }
+
+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
+ m = ti->interp_mode[i] & 3;
+ for (c = 0; c < 4; ++c) {
+ if (!ti->input_access[i][c])
+ continue;
+ a = ti->input_loc[i][c] / 2;
+ if (ti->input_loc[i][c] >= 0x2c0)
+ a -= 32;
+ if (ti->input_loc[i][0] == 0x70)
+ fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */
+ else
+ if (ti->input_loc[i][0] == 0x2e0)
+ fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */
+ else
+ fp->hdr[4 + a / 32] |= m << (a % 32);
+ }
+ }
+
+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
+ if (i != ti->fp_depth_output)
+ fp->hdr[18] |= 0xf << ti->output_loc[i][0];
+ }
+
+ for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
+ a = ti->sysval_loc[i] / 2;
+ if ((a > 0) && (a < 0xf00 / 2))
+ fp->hdr[4 + a / 32] |= NVC0_INTERP_FLAT << (a % 32);
+ }
+
+ return 0;
+}
+
+static boolean
+nvc0_prog_scan(struct nvc0_translation_info *ti)
+{
+ struct nvc0_program *prog = ti->prog;
+ struct tgsi_parse_context parse;
+ int ret;
+ unsigned i;
+
+#ifdef NOUVEAU_DEBUG
+ tgsi_dump(prog->pipe.tokens, 0);
+#endif
+
+ tgsi_scan_shader(prog->pipe.tokens, &ti->scan);
+
+ if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
+ ti->fp_depth_output = 255;
+ for (i = 0; i < ti->scan.num_outputs; ++i)
+ if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION)
+ ti->fp_depth_output = i;
+ }
+
+ ti->subr =
+ CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
+
+ ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
+ ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
+
+ ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
+
+ tgsi_parse_init(&parse, prog->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ prog_immediate(ti, &parse.FullToken.FullImmediate);
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ prog_decl(ti, &parse.FullToken.FullDeclaration);
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ ti->insns[ti->num_insns] = parse.FullToken.FullInstruction;
+ prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns);
+ break;
+ default:
+ break;
+ }
+ }
+
+ for (i = 0; i < ti->num_subrs; ++i) {
+ unsigned pc = ti->subr[i].id;
+ while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
+ prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
+ }
+
+ switch (prog->type) {
+ case PIPE_SHADER_VERTEX:
+ ti->input_file = NV_FILE_MEM_A;
+ ti->output_file = NV_FILE_MEM_V;
+ ret = nvc0_vp_gen_header(prog, ti);
+ break;
+ /*
+ case PIPE_SHADER_TESSELLATION_CONTROL:
+ ret = nvc0_tcp_gen_header(ti);
+ break;
+ case PIPE_SHADER_TESSELLATION_EVALUATION:
+ ret = nvc0_tep_gen_header(ti);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ ret = nvc0_gp_gen_header(ti);
+ break;
+ */
+ case PIPE_SHADER_FRAGMENT:
+ ti->input_file = NV_FILE_MEM_V;
+ ti->output_file = NV_FILE_GPR;
+
+ if (ti->scan.writes_z)
+ prog->flags[0] = 0x11; /* ? */
+ else
+ if (!ti->scan.uses_kill && !ti->global_stores)
+ prog->fp.early_z = 1;
+
+ ret = nvc0_fp_gen_header(prog, ti);
+ break;
+ default:
+ assert(!"unsupported program type");
+ ret = -1;
+ break;
+ }
+
+ if (ti->require_stores) {
+ prog->hdr[0] |= 1 << 26;
+ prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */
+ }
+
+ assert(!ret);
+ return ret;
+}
+
+boolean
+nvc0_program_translate(struct nvc0_program *prog)
+{
+ struct nvc0_translation_info *ti;
+ int ret;
+
+ ti = CALLOC_STRUCT(nvc0_translation_info);
+ ti->prog = prog;
+
+ ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
+
+ prog->vp.edgeflag = PIPE_MAX_ATTRIBS;
+
+ if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps)
+ ti->append_ucp = TRUE;
+
+ ret = nvc0_prog_scan(ti);
+ if (ret) {
+ NOUVEAU_ERR("unsupported shader program\n");
+ goto out;
+ }
+
+ ret = nvc0_generate_code(ti);
+ if (ret)
+ NOUVEAU_ERR("shader translation failed\n");
+
+ {
+ unsigned i;
+ for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i)
+ debug_printf("HDR[%02lx] = 0x%08x\n",
+ i * sizeof(prog->hdr[0]), prog->hdr[i]);
+ }
+
+out:
+ if (ti->immd32)
+ FREE(ti->immd32);
+ if (ti->immd32_ty)
+ FREE(ti->immd32_ty);
+ if (ti->insns)
+ FREE(ti->insns);
+ if (ti->subr)
+ FREE(ti->subr);
+ FREE(ti);
+ return ret ? FALSE : TRUE;
+}
+
+void
+nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+ if (prog->res)
+ nouveau_resource_free(&prog->res);
+
+ if (prog->code)
+ FREE(prog->code);
+ if (prog->relocs)
+ FREE(prog->relocs);
+
+ memset(prog->hdr, 0, sizeof(prog->hdr));
+
+ prog->translated = FALSE;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
new file mode 100644
index 0000000000..f6fea29780
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_program.h
@@ -0,0 +1,92 @@
+
+#ifndef __NVC0_PROGRAM_H__
+#define __NVC0_PROGRAM_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+#define NVC0_CAP_MAX_PROGRAM_TEMPS 64
+
+#define NVC0_SHADER_HEADER_SIZE (20 * 4)
+
+struct nvc0_program {
+ struct pipe_shader_state pipe;
+
+ ubyte type;
+ boolean translated;
+ ubyte max_gpr;
+
+ uint32_t *code;
+ unsigned code_base;
+ unsigned code_size;
+ unsigned parm_size;
+
+ uint32_t hdr[20]; /* TODO: move this into code to save space */
+
+ uint32_t flags[2];
+
+ struct {
+ uint8_t edgeflag;
+ uint8_t num_ucps;
+ uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS];
+ } vp;
+ struct {
+ uint8_t early_z;
+ uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
+ } fp;
+
+ void *relocs;
+ unsigned num_relocs;
+
+ struct nouveau_resource *res;
+};
+
+/* first 2 bits are written into the program header, for each input */
+#define NVC0_INTERP_FLAT (1 << 0)
+#define NVC0_INTERP_PERSPECTIVE (2 << 0)
+#define NVC0_INTERP_LINEAR (3 << 0)
+#define NVC0_INTERP_CENTROID (1 << 2)
+
+/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
+struct nvc0_subroutine {
+ unsigned id;
+ unsigned first_insn;
+ uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
+ uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
+};
+
+struct nvc0_translation_info {
+ struct nvc0_program *prog;
+ struct tgsi_full_instruction *insns;
+ unsigned num_insns;
+ ubyte input_file;
+ ubyte output_file;
+ ubyte fp_depth_output;
+ uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4];
+ uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4];
+ uint16_t sysval_loc[TGSI_SEMANTIC_COUNT];
+ boolean sysval_in[TGSI_SEMANTIC_COUNT];
+ int input_access[PIPE_MAX_SHADER_INPUTS][4];
+ int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
+ ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
+ boolean indirect_inputs;
+ boolean indirect_outputs;
+ boolean require_stores;
+ boolean global_stores;
+ uint32_t *immd32;
+ ubyte *immd32_ty;
+ unsigned immd32_nr;
+ unsigned temp128_nr;
+ ubyte edgeflag_out;
+ struct nvc0_subroutine *subr;
+ unsigned num_subrs;
+ boolean append_ucp;
+ struct tgsi_shader_info scan;
+};
+
+int nvc0_generate_code(struct nvc0_translation_info *);
+
+void nvc0_relocate_program(struct nvc0_program *,
+ uint32_t code_base, uint32_t data_base);
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c
new file mode 100644
index 0000000000..2e9f4c1092
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_push.c
@@ -0,0 +1,354 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3d.xml.h"
+
+struct push_context {
+ struct nouveau_channel *chan;
+
+ void *idxbuf;
+
+ uint32_t vertex_words;
+ uint32_t packet_vertex_limit;
+
+ struct translate *translate;
+
+ boolean primitive_restart;
+ uint32_t prim;
+ uint32_t restart_index;
+ uint32_t instance_id;
+
+ struct {
+ int buffer;
+ float value;
+ uint8_t *data;
+ unsigned offset;
+ unsigned stride;
+ } edgeflag;
+};
+
+static void
+init_push_context(struct nvc0_context *nvc0, struct push_context *ctx)
+{
+ struct pipe_vertex_element *ve;
+
+ ctx->chan = nvc0->screen->base.channel;
+ ctx->translate = nvc0->vertex->translate;
+
+ ctx->edgeflag.value = 0.5f;
+
+ if (NVC0_USING_EDGEFLAG(nvc0)) {
+ ve = &nvc0->vertex->element[nvc0->vertprog->vp.edgeflag].pipe;
+
+ ctx->edgeflag.buffer = ve->vertex_buffer_index;
+ ctx->edgeflag.offset = ve->src_offset;
+
+ ctx->packet_vertex_limit = 1;
+ } else {
+ ctx->edgeflag.buffer = -1;
+ ctx->edgeflag.offset = 0;
+ ctx->edgeflag.stride = 0;
+ ctx->edgeflag.data = NULL;
+
+ ctx->packet_vertex_limit = nvc0->vertex->vtx_per_packet_max;
+ }
+
+ ctx->vertex_words = nvc0->vertex->vtx_size;
+}
+
+static INLINE void
+set_edgeflag(struct push_context *ctx, unsigned vtx_id)
+{
+ float f = *(float *)(ctx->edgeflag.data + vtx_id * ctx->edgeflag.stride);
+
+ if (ctx->edgeflag.value != f) {
+ ctx->edgeflag.value = f;
+ IMMED_RING(ctx->chan, RING_3D(EDGEFLAG_ENABLE), f ? 1 : 0);
+ }
+}
+
+static INLINE unsigned
+prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static void
+emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i08(elts, push, ctx->restart_index);
+
+ if (unlikely(ctx->edgeflag.buffer >= 0) && nr)
+ set_edgeflag(ctx, elts[0]);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts8(ctx->translate, elts, nr, ctx->instance_id,
+ ctx->chan->cur);
+
+ ctx->chan->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
+ OUT_RING (ctx->chan, 0);
+ OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
+ }
+ }
+}
+
+static void
+emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i16(elts, push, ctx->restart_index);
+
+ if (unlikely(ctx->edgeflag.buffer >= 0) && nr)
+ set_edgeflag(ctx, elts[0]);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts16(ctx->translate, elts, nr, ctx->instance_id,
+ ctx->chan->cur);
+
+ ctx->chan->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
+ OUT_RING (ctx->chan, 0);
+ OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
+ }
+ }
+}
+
+static void
+emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i32(elts, push, ctx->restart_index);
+
+ if (unlikely(ctx->edgeflag.buffer >= 0) && nr)
+ set_edgeflag(ctx, elts[0]);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts(ctx->translate, elts, nr, ctx->instance_id,
+ ctx->chan->cur);
+
+ ctx->chan->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
+ OUT_RING (ctx->chan, 0);
+ OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
+ }
+ }
+}
+
+static void
+emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size = ctx->vertex_words * push;
+
+ if (unlikely(ctx->edgeflag.buffer >= 0))
+ set_edgeflag(ctx, start);
+
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+ ctx->translate->run(ctx->translate, start, push, ctx->instance_id,
+ ctx->chan->cur);
+ ctx->chan->cur += size;
+ count -= push;
+ start += push;
+ }
+}
+
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+void
+nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, index_size;
+ unsigned inst = info->instance_count;
+ boolean apply_bias = info->indexed && info->index_bias;
+
+ init_push_context(nvc0, &ctx);
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ uint8_t *data;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
+ struct nv04_resource *res = nv04_resource(vb->buffer);
+
+ data = nouveau_resource_map_offset(&nvc0->base, res,
+ vb->buffer_offset, NOUVEAU_BO_RD);
+
+ if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i))))
+ data += info->index_bias * vb->stride;
+
+ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+
+ if (unlikely(i == ctx.edgeflag.buffer)) {
+ ctx.edgeflag.data = data + ctx.edgeflag.offset;
+ ctx.edgeflag.stride = vb->stride;
+ }
+ }
+
+ if (info->indexed) {
+ ctx.idxbuf =
+ nouveau_resource_map_offset(&nvc0->base,
+ nv04_resource(nvc0->idxbuf.buffer),
+ nvc0->idxbuf.offset, NOUVEAU_BO_RD);
+ if (!ctx.idxbuf)
+ return;
+ index_size = nvc0->idxbuf.index_size;
+ ctx.primitive_restart = info->primitive_restart;
+ ctx.restart_index = info->restart_index;
+ } else {
+ ctx.idxbuf = NULL;
+ index_size = 0;
+ ctx.primitive_restart = FALSE;
+ ctx.restart_index = 0;
+ }
+
+ ctx.instance_id = info->start_instance;
+ ctx.prim = nvc0_prim_gl(info->mode);
+
+ while (inst--) {
+ BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (ctx.chan, ctx.prim);
+ switch (index_size) {
+ case 0:
+ emit_vertices_seq(&ctx, info->start, info->count);
+ break;
+ case 1:
+ emit_vertices_i08(&ctx, info->start, info->count);
+ break;
+ case 2:
+ emit_vertices_i16(&ctx, info->start, info->count);
+ break;
+ case 4:
+ emit_vertices_i32(&ctx, info->start, info->count);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ IMMED_RING(ctx.chan, RING_3D(VERTEX_END_GL), 0);
+
+ ctx.instance_id++;
+ ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+
+ if (unlikely(ctx.edgeflag.value == 0.0f))
+ IMMED_RING(ctx.chan, RING_3D(EDGEFLAG_ENABLE), 1);
+
+ if (info->indexed)
+ nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer));
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
+ nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer));
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_push2.c b/src/gallium/drivers/nvc0/nvc0_push2.c
new file mode 100644
index 0000000000..6f51600558
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_push2.c
@@ -0,0 +1,333 @@
+
+#if 0 /* not used, kept for now to compare with util/translate */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3d.xml.h"
+
+struct push_context {
+ struct nvc0_context *nvc0;
+
+ uint vertex_size;
+
+ void *idxbuf;
+ uint idxsize;
+
+ float edgeflag;
+ int edgeflag_input;
+
+ struct {
+ void *map;
+ void (*push)(struct nouveau_channel *, void *);
+ uint32_t stride;
+ uint32_t divisor;
+ uint32_t step;
+ } attr[32];
+ int num_attrs;
+};
+
+static void
+emit_b32_1(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b32_2(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+}
+
+static void
+emit_b32_3(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+ OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b32_4(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+ OUT_RING(chan, v[2]);
+ OUT_RING(chan, v[3]);
+}
+
+static void
+emit_b16_1(struct nouveau_channel *chan, void *data)
+{
+ uint16_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b16_3(struct nouveau_channel *chan, void *data)
+{
+ uint16_t *v = data;
+
+ OUT_RING(chan, (v[1] << 16) | v[0]);
+ OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b08_1(struct nouveau_channel *chan, void *data)
+{
+ uint8_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b08_3(struct nouveau_channel *chan, void *data)
+{
+ uint8_t *v = data;
+
+ OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
+}
+
+static void
+emit_b64_1(struct nouveau_channel *chan, void *data)
+{
+ double *v = data;
+
+ OUT_RINGf(chan, v[0]);
+}
+
+static void
+emit_b64_2(struct nouveau_channel *chan, void *data)
+{
+ double *v = data;
+
+ OUT_RINGf(chan, v[0]);
+ OUT_RINGf(chan, v[1]);
+}
+
+static void
+emit_b64_3(struct nouveau_channel *chan, void *data)
+{
+ double *v = data;
+
+ OUT_RINGf(chan, v[0]);
+ OUT_RINGf(chan, v[1]);
+ OUT_RINGf(chan, v[2]);
+}
+
+static void
+emit_b64_4(struct nouveau_channel *chan, void *data)
+{
+ double *v = data;
+
+ OUT_RINGf(chan, v[0]);
+ OUT_RINGf(chan, v[1]);
+ OUT_RINGf(chan, v[2]);
+ OUT_RINGf(chan, v[3]);
+}
+
+static INLINE void
+emit_vertex(struct push_context *ctx, unsigned n)
+{
+ struct nouveau_channel *chan = ctx->nvc0->screen->base.channel;
+ int i;
+
+ if (ctx->edgeflag_input < 32) {
+ /* TODO */
+ }
+
+ BEGIN_RING_NI(chan, RING_3D(VERTEX_DATA), ctx->vertex_size);
+ for (i = 0; i < ctx->num_attrs; ++i)
+ ctx->attr[i].push(chan,
+ (uint8_t *)ctx->attr[i].map + n * ctx->attr[i].stride);
+}
+
+static void
+emit_edgeflag(struct push_context *ctx, boolean enabled)
+{
+ struct nouveau_channel *chan = ctx->nvc0->screen->base.channel;
+
+ IMMED_RING(chan, RING_3D(EDGEFLAG_ENABLE), enabled);
+}
+
+static void
+emit_elt08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint8_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint16_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint32_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ while (count--)
+ emit_vertex(ctx, start++);
+}
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+void
+nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, n;
+ unsigned inst = info->instance_count;
+ unsigned prim = nvc0_prim_gl(info->mode);
+
+ ctx.nvc0 = nvc0;
+ ctx.vertex_size = nvc0->vertex->vtx_size;
+ ctx.idxbuf = NULL;
+ ctx.num_attrs = 0;
+ ctx.edgeflag = 0.5f;
+ ctx.edgeflag_input = 32;
+
+ for (i = 0; i < nvc0->vertex->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
+ struct nouveau_bo *bo = nvc0_resource(vb->buffer)->bo;
+ unsigned nr_components;
+
+ if (!(nvc0->vbo_fifo & (1 << i)))
+ continue;
+ n = ctx.num_attrs++;
+
+ if (nouveau_bo_map(bo, NOUVEAU_BO_RD))
+ return;
+ ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset;
+
+ nouveau_bo_unmap(bo);
+
+ ctx.attr[n].stride = vb->stride;
+ ctx.attr[n].divisor = ve->instance_divisor;
+
+ nr_components = util_format_get_nr_components(ve->src_format);
+ switch (util_format_get_component_bits(ve->src_format,
+ UTIL_FORMAT_COLORSPACE_RGB, 0)) {
+ case 8:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b08_1; break;
+ case 2: ctx.attr[n].push = emit_b16_1; break;
+ case 3: ctx.attr[n].push = emit_b08_3; break;
+ case 4: ctx.attr[n].push = emit_b32_1; break;
+ }
+ break;
+ case 16:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b16_1; break;
+ case 2: ctx.attr[n].push = emit_b32_1; break;
+ case 3: ctx.attr[n].push = emit_b16_3; break;
+ case 4: ctx.attr[n].push = emit_b32_2; break;
+ }
+ break;
+ case 32:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b32_1; break;
+ case 2: ctx.attr[n].push = emit_b32_2; break;
+ case 3: ctx.attr[n].push = emit_b32_3; break;
+ case 4: ctx.attr[n].push = emit_b32_4; break;
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ if (info->indexed) {
+ struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer);
+ if (!res || nouveau_bo_map(res->bo, NOUVEAU_BO_RD))
+ return;
+ ctx.idxbuf = (uint8_t *)res->bo->map + nvc0->idxbuf.offset + res->offset;
+ nouveau_bo_unmap(res->bo);
+ ctx.idxsize = nvc0->idxbuf.index_size;
+ } else {
+ ctx.idxsize = 0;
+ }
+
+ while (inst--) {
+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (nvc0->screen->base.channel, prim);
+ switch (ctx.idxsize) {
+ case 0:
+ emit_seq(&ctx, info->start, info->count);
+ break;
+ case 1:
+ emit_elt08(&ctx, info->start, info->count);
+ break;
+ case 2:
+ emit_elt16(&ctx, info->start, info->count);
+ break;
+ case 4:
+ emit_elt32(&ctx, info->start, info->count);
+ break;
+ }
+ IMMED_RING(nvc0->screen->base.channel, RING_3D(VERTEX_END_GL), 0);
+
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c
new file mode 100644
index 0000000000..ead015b6b8
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_query.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2011 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
+ */
+
+#include "nvc0_context.h"
+#include "nouveau/nv_object.xml.h"
+
+/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
+ * (since we use only a single GPU channel per screen) will not work properly.
+ *
+ * The first is not that big of an issue because OpenGL does not allow nested
+ * queries anyway.
+ */
+
+struct nvc0_query {
+ uint32_t *data;
+ uint32_t type;
+ uint32_t sequence;
+ struct nouveau_bo *bo;
+ uint32_t base;
+ uint32_t offset; /* base + i * 16 */
+ boolean ready;
+ boolean is64bit;
+ struct nouveau_mm_allocation *mm;
+};
+
+#define NVC0_QUERY_ALLOC_SPACE 128
+
+static INLINE struct nvc0_query *
+nvc0_query(struct pipe_query *pipe)
+{
+ return (struct nvc0_query *)pipe;
+}
+
+static boolean
+nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ int ret;
+
+ if (q->bo) {
+ nouveau_bo_ref(NULL, &q->bo);
+ if (q->mm) {
+ if (q->ready)
+ nouveau_mm_free(q->mm);
+ else
+ nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, q->mm);
+ }
+ }
+ if (size) {
+ q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
+ if (!q->bo)
+ return FALSE;
+ q->offset = q->base;
+
+ ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD |
+ NOUVEAU_BO_NOSYNC);
+ if (ret) {
+ nvc0_query_allocate(nvc0, q, 0);
+ return FALSE;
+ }
+ q->data = q->bo->map;
+ nouveau_bo_unmap(q->bo);
+ }
+ return TRUE;
+}
+
+static void
+nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
+ FREE(nvc0_query(pq));
+}
+
+static struct pipe_query *
+nvc0_query_create(struct pipe_context *pipe, unsigned type)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_query *q;
+
+ q = CALLOC_STRUCT(nvc0_query);
+ if (!q)
+ return NULL;
+
+ if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) {
+ FREE(q);
+ return NULL;
+ }
+
+ q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
+ type == PIPE_QUERY_PRIMITIVES_EMITTED ||
+ type == PIPE_QUERY_SO_STATISTICS);
+ q->type = type;
+
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset -= 16;
+ q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
+ }
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q,
+ unsigned offset, uint32_t get)
+{
+ offset += q->offset;
+
+ MARK_RING (chan, 5, 2);
+ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RING (chan, q->sequence);
+ OUT_RING (chan, get);
+}
+
+static void
+nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_query *q = nvc0_query(pq);
+
+ /* For occlusion queries we have to change the storage, because a previous
+ * query might set the initial render conition to FALSE even *after* we re-
+ * initialized it to TRUE.
+ */
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset += 16;
+ q->data += 16 / sizeof(*q->data);
+ if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
+ nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+
+ /* XXX: can we do this with the GPU, and sync with respect to a previous
+ * query ?
+ */
+ q->data[1] = 1; /* initial render condition = TRUE */
+ }
+ if (!q->is64bit)
+ q->data[0] = q->sequence++; /* the previously used one */
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT);
+ IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
+ IMMED_RING(chan, RING_3D(COUNTER_RESET),
+ NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ IMMED_RING(chan, RING_3D(COUNTER_RESET),
+ NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
+ OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
+ OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_query_get(chan, q, 0x10, 0x00005002);
+ break;
+ default:
+ break;
+ }
+ q->ready = FALSE;
+}
+
+static void
+nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_query *q = nvc0_query(pq);
+
+ const int index = 0; /* for multiple vertex streams */
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ nvc0_query_get(chan, q, 0, 0x0100f002);
+ BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+ OUT_RING (chan, 0);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5));
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
+ nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5));
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_query_get(chan, q, 0, 0x00005002);
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ nvc0_query_get(chan, q, 0, 0x1000f010);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static INLINE boolean
+nvc0_query_ready(struct nvc0_query *q)
+{
+ return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
+}
+
+static INLINE boolean
+nvc0_query_wait(struct nvc0_query *q)
+{
+ int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD);
+ if (ret)
+ return FALSE;
+ nouveau_bo_unmap(q->bo);
+ return TRUE;
+}
+
+static boolean
+nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, void *result)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+ uint64_t *res64 = result;
+ uint32_t *res32 = result;
+ boolean *res8 = result;
+ uint64_t *data64 = (uint64_t *)q->data;
+
+ if (q->type == PIPE_QUERY_GPU_FINISHED) {
+ res8[0] = nvc0_query_ready(q);
+ return TRUE;
+ }
+
+ if (!q->ready) /* update ? */
+ q->ready = nvc0_query_ready(q);
+ if (!q->ready) {
+ struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel;
+ if (!wait) {
+ if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */
+ FIRE_RING(chan);
+ return FALSE;
+ }
+ if (!nvc0_query_wait(q))
+ return FALSE;
+ }
+ q->ready = TRUE;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
+ res32[0] = q->data[1];
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
+ res64[0] = data64[0];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ res64[0] = data64[0];
+ res64[1] = data64[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
+ res64[0] = 1000000000;
+ res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ res64[0] = data64[1] - data64[3];
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void
+nvc0_render_condition(struct pipe_context *pipe,
+ struct pipe_query *pq, uint mode)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_query *q;
+
+ if (!pq) {
+ IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+ return;
+ }
+ q = nvc0_query(pq);
+
+ if (mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ MARK_RING (chan, 5, 2);
+ BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RING (chan, q->sequence);
+ OUT_RING (chan, 0x00001001);
+ }
+
+ MARK_RING (chan, 4, 2);
+ BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO);
+}
+
+void
+nvc0_init_query_functions(struct nvc0_context *nvc0)
+{
+ struct pipe_context *pipe = &nvc0->base.pipe;
+
+ pipe->create_query = nvc0_query_create;
+ pipe->destroy_query = nvc0_query_destroy;
+ pipe->begin_query = nvc0_query_begin;
+ pipe->end_query = nvc0_query_end;
+ pipe->get_query_result = nvc0_query_result;
+ pipe->render_condition = nvc0_render_condition;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c
new file mode 100644
index 0000000000..44e66314e7
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_resource.c
@@ -0,0 +1,51 @@
+
+#include "pipe/p_context.h"
+#include "nvc0_resource.h"
+#include "nouveau/nouveau_screen.h"
+
+
+static struct pipe_resource *
+nvc0_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ switch (templ->target) {
+ case PIPE_BUFFER:
+ return nouveau_buffer_create(screen, templ);
+ default:
+ return nvc0_miptree_create(screen, templ);
+ }
+}
+
+static struct pipe_resource *
+nvc0_resource_from_handle(struct pipe_screen * screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ if (templ->target == PIPE_BUFFER)
+ return NULL;
+ else
+ return nvc0_miptree_from_handle(screen, templ, whandle);
+}
+
+void
+nvc0_init_resource_functions(struct pipe_context *pcontext)
+{
+ pcontext->get_transfer = u_get_transfer_vtbl;
+ pcontext->transfer_map = u_transfer_map_vtbl;
+ pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
+ pcontext->transfer_unmap = u_transfer_unmap_vtbl;
+ pcontext->transfer_destroy = u_transfer_destroy_vtbl;
+ pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
+ pcontext->create_surface = nvc0_miptree_surface_new;
+ pcontext->surface_destroy = nvc0_miptree_surface_del;
+}
+
+void
+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen)
+{
+ pscreen->resource_create = nvc0_resource_create;
+ pscreen->resource_from_handle = nvc0_resource_from_handle;
+ pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+ pscreen->resource_destroy = u_resource_destroy_vtbl;
+ pscreen->user_buffer_create = nouveau_user_buffer_create;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h
new file mode 100644
index 0000000000..f1c445b515
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_resource.h
@@ -0,0 +1,75 @@
+
+#ifndef __NVC0_RESOURCE_H__
+#define __NVC0_RESOURCE_H__
+
+#include "util/u_transfer.h"
+#include "util/u_double_list.h"
+#define NOUVEAU_NVC0
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_fence.h"
+#include "nouveau/nouveau_buffer.h"
+#undef NOUVEAU_NVC0
+
+void
+nvc0_init_resource_functions(struct pipe_context *pcontext);
+
+void
+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen);
+
+#define NVC0_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf)
+
+#define NVC0_TILE_PITCH(m) (64 << NVC0_TILE_DIM_SHIFT(m, 0))
+#define NVC0_TILE_HEIGHT(m) ( 8 << NVC0_TILE_DIM_SHIFT(m, 1))
+#define NVC0_TILE_DEPTH(m) ( 1 << NVC0_TILE_DIM_SHIFT(m, 2))
+
+#define NVC0_TILE_SIZE_2D(m) (((64 * 8) << \
+ NVC0_TILE_DIM_SHIFT(m, 0)) << \
+ NVC0_TILE_DIM_SHIFT(m, 1))
+
+#define NVC0_TILE_SIZE(m) (NVC0_TILE_SIZE_2D(m) << NVC0_TILE_DIM_SHIFT(m, 2))
+
+struct nvc0_miptree_level {
+ uint32_t offset;
+ uint32_t pitch;
+ uint32_t tile_mode;
+};
+
+#define NVC0_MAX_TEXTURE_LEVELS 16
+
+struct nvc0_miptree {
+ struct nv04_resource base;
+ struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS];
+ uint32_t total_size;
+ uint32_t layer_stride;
+ boolean layout_3d; /* TRUE if layer count varies with mip level */
+};
+
+static INLINE struct nvc0_miptree *
+nvc0_miptree(struct pipe_resource *pt)
+{
+ return (struct nvc0_miptree *)pt;
+}
+
+/* Internal functions:
+ */
+struct pipe_resource *
+nvc0_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmp);
+
+struct pipe_resource *
+nvc0_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle);
+
+struct pipe_surface *
+nvc0_miptree_surface_new(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_surface *templ);
+
+void
+nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *);
+
+uint32_t
+nvc0_miptree_zslice_offset(struct nvc0_miptree *, unsigned l, unsigned z);
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
new file mode 100644
index 0000000000..1047ba3c33
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -0,0 +1,676 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "util/u_format_s3tc.h"
+#include "pipe/p_screen.h"
+
+#include "nvc0_context.h"
+#include "nvc0_screen.h"
+
+#include "nouveau/nv_object.xml.h"
+#include "nvc0_graph_macros.h"
+
+static boolean
+nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bindings)
+{
+ if (sample_count > 1)
+ return FALSE;
+
+ if (!util_format_s3tc_enabled) {
+ switch (format) {
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return FALSE;
+ default:
+ break;
+ }
+ }
+
+ /* transfers & shared are always supported */
+ bindings &= ~(PIPE_BIND_TRANSFER_READ |
+ PIPE_BIND_TRANSFER_WRITE |
+ PIPE_BIND_SHARED);
+
+ return (nvc0_format_table[format].usage & bindings) == bindings;
+}
+
+static int
+nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 32;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 64;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_ARRAY_TEXTURES:
+ return 1;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_DEPTH_CLAMP:
+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_GLSL:
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+ case PIPE_CAP_TIMER_QUERY:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_STREAM_OUTPUT:
+ return 0;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 0;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 0;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ return 1;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static int
+nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ /*
+ case PIPE_SHADER_TESSELLATION_CONTROL:
+ case PIPE_SHADER_TESSELLATION_EVALUATION:
+ */
+ case PIPE_SHADER_GEOMETRY:
+ case PIPE_SHADER_FRAGMENT:
+ break;
+ default:
+ return 0;
+ }
+
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 4;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_VERTEX)
+ return 32;
+ return 0x300 / 16;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return 65536 / 16;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return 14;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return 1;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ return shader != PIPE_SHADER_FRAGMENT;
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return NVC0_CAP_MAX_PROGRAM_TEMPS;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 0; /* please inline, or provide function declarations */
+ default:
+ NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0f;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0f;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0f;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0.0f;
+ }
+}
+
+static void
+nvc0_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+
+ nouveau_fence_wait(screen->base.fence.current);
+ nouveau_fence_ref(NULL, &screen->base.fence.current);
+
+ nouveau_bo_ref(NULL, &screen->text);
+ nouveau_bo_ref(NULL, &screen->tls);
+ nouveau_bo_ref(NULL, &screen->txc);
+ nouveau_bo_ref(NULL, &screen->fence.bo);
+ nouveau_bo_ref(NULL, &screen->vfetch_cache);
+
+ nouveau_resource_destroy(&screen->text_heap);
+
+ if (screen->tic.entries)
+ FREE(screen->tic.entries);
+
+ nouveau_mm_destroy(screen->mm_VRAM_fe0);
+
+ nouveau_grobj_free(&screen->fermi);
+ nouveau_grobj_free(&screen->eng2d);
+ nouveau_grobj_free(&screen->m2mf);
+
+ nouveau_screen_fini(&screen->base);
+
+ FREE(screen);
+}
+
+static int
+nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
+ unsigned size, const uint32_t *data)
+{
+ struct nouveau_channel *chan = screen->base.channel;
+
+ size /= 4;
+
+ BEGIN_RING(chan, RING_3D_(NVC0_GRAPH_MACRO_ID), 2);
+ OUT_RING (chan, (m - 0x3800) / 8);
+ OUT_RING (chan, pos);
+ BEGIN_RING_1I(chan, RING_3D_(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
+ OUT_RING (chan, pos);
+ OUT_RINGp (chan, data, size);
+
+ return pos + size;
+}
+
+static void
+nvc0_magic_3d_init(struct nouveau_channel *chan)
+{
+ BEGIN_RING(chan, RING_3D_(0x10cc), 1);
+ OUT_RING (chan, 0xff);
+ BEGIN_RING(chan, RING_3D_(0x10e0), 2);
+ OUT_RING(chan, 0xff);
+ OUT_RING(chan, 0xff);
+ BEGIN_RING(chan, RING_3D_(0x10ec), 2);
+ OUT_RING(chan, 0xff);
+ OUT_RING(chan, 0xff);
+ BEGIN_RING(chan, RING_3D_(0x074c), 1);
+ OUT_RING (chan, 0x3f);
+
+ BEGIN_RING(chan, RING_3D_(0x16a8), 1);
+ OUT_RING (chan, (3 << 16) | 3);
+ BEGIN_RING(chan, RING_3D_(0x1794), 1);
+ OUT_RING (chan, (2 << 16) | 2);
+ BEGIN_RING(chan, RING_3D_(0x0de8), 1);
+ OUT_RING (chan, 1);
+
+#if 0 /* software method */
+ BEGIN_RING(chan, RING_3D_(0x1528), 1); /* MP poke */
+ OUT_RING (chan, 0);
+#endif
+
+ BEGIN_RING(chan, RING_3D_(0x12ac), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x0218), 1);
+ OUT_RING (chan, 0x10);
+ BEGIN_RING(chan, RING_3D_(0x10fc), 1);
+ OUT_RING (chan, 0x10);
+ BEGIN_RING(chan, RING_3D_(0x1290), 1);
+ OUT_RING (chan, 0x10);
+ BEGIN_RING(chan, RING_3D_(0x12d8), 2);
+ OUT_RING (chan, 0x10);
+ OUT_RING (chan, 0x10);
+ BEGIN_RING(chan, RING_3D_(0x06d4), 1);
+ OUT_RING (chan, 8);
+ BEGIN_RING(chan, RING_3D_(0x1140), 1);
+ OUT_RING (chan, 0x10);
+ BEGIN_RING(chan, RING_3D_(0x1610), 1);
+ OUT_RING (chan, 0xe);
+
+ BEGIN_RING(chan, RING_3D_(0x164c), 1);
+ OUT_RING (chan, 1 << 12);
+ BEGIN_RING(chan, RING_3D_(0x151c), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D_(0x020c), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D_(0x030c), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x0300), 1);
+ OUT_RING (chan, 3);
+#if 0 /* software method */
+ BEGIN_RING(chan, RING_3D_(0x1280), 1); /* PGRAPH poke */
+ OUT_RING (chan, 0);
+#endif
+ BEGIN_RING(chan, RING_3D_(0x02d0), 1);
+ OUT_RING (chan, 0x1f40);
+ BEGIN_RING(chan, RING_3D_(0x00fdc), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D_(0x19c0), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D_(0x075c), 1);
+ OUT_RING (chan, 3);
+
+ BEGIN_RING(chan, RING_3D_(0x0fac), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x0f90), 1);
+ OUT_RING (chan, 0);
+}
+
+static void
+nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 sequence)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ struct nouveau_channel *chan = screen->base.channel;
+
+ MARK_RING (chan, 5, 2);
+ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
+ OUT_RING (chan, sequence);
+ OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT |
+ (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT));
+}
+
+static u32
+nvc0_screen_fence_update(struct pipe_screen *pscreen)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ return screen->fence.map[0];
+}
+
+#define FAIL_SCREEN_INIT(str, err) \
+ do { \
+ NOUVEAU_ERR(str, err); \
+ nvc0_screen_destroy(pscreen); \
+ return NULL; \
+ } while(0)
+
+struct pipe_screen *
+nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
+{
+ struct nvc0_screen *screen;
+ struct nouveau_channel *chan;
+ struct pipe_screen *pscreen;
+ int ret;
+ unsigned i;
+
+ screen = CALLOC_STRUCT(nvc0_screen);
+ if (!screen)
+ return NULL;
+ pscreen = &screen->base.base;
+
+ screen->base.sysmem_bindings = PIPE_BIND_CONSTANT_BUFFER;
+
+ ret = nouveau_screen_init(&screen->base, dev);
+ if (ret) {
+ nvc0_screen_destroy(pscreen);
+ return NULL;
+ }
+ chan = screen->base.channel;
+
+ pscreen->winsys = ws;
+ pscreen->destroy = nvc0_screen_destroy;
+ pscreen->context_create = nvc0_create;
+ pscreen->is_format_supported = nvc0_screen_is_format_supported;
+ pscreen->get_param = nvc0_screen_get_param;
+ pscreen->get_shader_param = nvc0_screen_get_shader_param;
+ pscreen->get_paramf = nvc0_screen_get_paramf;
+
+ nvc0_screen_init_resource_functions(pscreen);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
+ &screen->fence.bo);
+ if (ret)
+ goto fail;
+ nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR);
+ screen->fence.map = screen->fence.bo->map;
+ nouveau_bo_unmap(screen->fence.bo);
+ screen->base.fence.emit = nvc0_screen_fence_emit;
+ screen->base.fence.update = nvc0_screen_fence_update;
+
+ for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) {
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE,
+ &screen->scratch.bo[i]);
+ if (ret)
+ goto fail;
+ }
+
+ ret = nouveau_grobj_alloc(chan, 0xbeef9039, NVC0_M2MF, &screen->m2mf);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
+
+ BIND_RING (chan, screen->m2mf, NVC0_SUBCH_MF);
+ BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, 0);
+
+ ret = nouveau_grobj_alloc(chan, 0xbeef902d, NVC0_2D, &screen->eng2d);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret);
+
+ BIND_RING (chan, screen->eng2d, NVC0_SUBCH_2D);
+ BEGIN_RING(chan, RING_2D(OPERATION), 1);
+ OUT_RING (chan, NVC0_2D_OPERATION_SRCCOPY);
+ BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_2D_(0x0884), 1);
+ OUT_RING (chan, 0x3f);
+ BEGIN_RING(chan, RING_2D_(0x0888), 1);
+ OUT_RING (chan, 1);
+
+ ret = nouveau_grobj_alloc(chan, 0xbeef9097, NVC0_3D, &screen->fermi);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
+
+ BIND_RING (chan, screen->fermi, NVC0_SUBCH_3D);
+ BEGIN_RING(chan, RING_3D(NOTIFY_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, RING_3D(COND_MODE), 1);
+ OUT_RING (chan, NVC0_3D_COND_MODE_ALWAYS);
+
+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+ OUT_RING (chan, 1);
+
+ BEGIN_RING(chan, RING_3D(CSAA_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1);
+ OUT_RING (chan, NVC0_3D_MULTISAMPLE_MODE_1X);
+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1);
+ OUT_RING (chan, 0);
+
+ nvc0_magic_3d_init(chan);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, &screen->text);
+ if (ret)
+ goto fail;
+
+ nouveau_resource_init(&screen->text_heap, 0, 1 << 20);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16,
+ &screen->uniforms);
+ if (ret)
+ goto fail;
+
+ /* auxiliary constants (6 user clip planes, base instance id) */
+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+ OUT_RING (chan, 256);
+ OUT_RELOCh(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ for (i = 0; i < 5; ++i) {
+ BEGIN_RING(chan, RING_3D(CB_BIND(i)), 1);
+ OUT_RING (chan, (15 << 4) | 1);
+ }
+
+ screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16);
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
+ screen->tls_size, &screen->tls);
+ if (ret)
+ goto fail;
+
+ BEGIN_RING(chan, RING_3D(CODE_ADDRESS_HIGH), 2);
+ OUT_RELOCh(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, screen->tls_size >> 32);
+ OUT_RING (chan, screen->tls_size);
+ BEGIN_RING(chan, RING_3D_(0x07a0), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1);
+ OUT_RING (chan, 0);
+
+ for (i = 0; i < 5; ++i) {
+ BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1);
+ OUT_RING (chan, 0x54);
+ }
+ BEGIN_RING(chan, RING_3D(LINKED_TSC), 1);
+ OUT_RING (chan, 0);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20,
+ &screen->vfetch_cache);
+ if (ret)
+ goto fail;
+
+ BEGIN_RING(chan, RING_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->vfetch_cache, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, screen->vfetch_cache, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, 3);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, &screen->txc);
+ if (ret)
+ goto fail;
+
+ BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, NVC0_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, NVC0_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */
+ OUT_RING (chan, 0x3f);
+
+ BEGIN_RING(chan, RING_3D(CLIP_RECTS_MODE), 1);
+ OUT_RING (chan, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY);
+ BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
+ for (i = 0; i < 8 * 2; ++i)
+ OUT_RING(chan, 0);
+ BEGIN_RING(chan, RING_3D(CLIP_RECTS_EN), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1);
+ OUT_RING (chan, 0);
+
+ /* neither scissors, viewport nor stencil mask should affect clears */
+ BEGIN_RING(chan, RING_3D(CLEAR_FLAGS), 1);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
+ OUT_RINGf (chan, 0.0f);
+ OUT_RINGf (chan, 1.0f);
+ BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+ OUT_RING (chan, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1);
+
+ /* We use scissors instead of exact view volume clipping,
+ * so they're always enabled.
+ */
+ BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 8192 << 16);
+ OUT_RING (chan, 8192 << 16);
+
+ BEGIN_RING(chan, RING_3D_(0x0fac), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x3484), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x0dbc), 1);
+ OUT_RING (chan, 0x00010000);
+ BEGIN_RING(chan, RING_3D_(0x0dd8), 1);
+ OUT_RING (chan, 0xff800006);
+ BEGIN_RING(chan, RING_3D_(0x3488), 1);
+ OUT_RING (chan, 0);
+
+#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
+
+ i = 0;
+ MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables);
+ MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select);
+ MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select);
+ MK_MACRO(NVC0_3D_GP_SELECT, nvc0_9097_gp_select);
+ MK_MACRO(NVC0_3D_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front);
+ MK_MACRO(NVC0_3D_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back);
+ MK_MACRO(NVC0_3D_COLOR_MASK_BROADCAST, nvc0_9097_color_mask_brdc);
+
+ BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
+ OUT_RING (chan, 0x40);
+ BEGIN_RING(chan, RING_3D(LAYER), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
+ OUT_RING (chan, 0x30);
+ BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1);
+ OUT_RING (chan, 3);
+ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1);
+ OUT_RING (chan, 0x20);
+ BEGIN_RING(chan, RING_3D(SP_SELECT(0)), 1);
+ OUT_RING (chan, 0x00);
+
+ BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(POINT_RASTER_RULES), 1);
+ OUT_RING (chan, NVC0_3D_POINT_RASTER_RULES_OGL);
+
+ BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1);
+ OUT_RING (chan, 0x11111111);
+ BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1);
+ OUT_RING (chan, 1);
+
+ BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
+ OUT_RING (chan, 0xab);
+ OUT_RING (chan, 0x00000000);
+
+ FIRE_RING (chan);
+
+ screen->tic.entries = CALLOC(4096, sizeof(void *));
+ screen->tsc.entries = screen->tic.entries + 2048;
+
+ screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0);
+
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+
+ return pscreen;
+
+fail:
+ nvc0_screen_destroy(pscreen);
+ return NULL;
+}
+
+void
+nvc0_screen_make_buffers_resident(struct nvc0_screen *screen)
+{
+ struct nouveau_channel *chan = screen->base.channel;
+
+ const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+
+ MARK_RING(chan, 5, 5);
+ nouveau_bo_validate(chan, screen->text, flags);
+ nouveau_bo_validate(chan, screen->uniforms, flags);
+ nouveau_bo_validate(chan, screen->txc, flags);
+ nouveau_bo_validate(chan, screen->vfetch_cache, flags);
+
+ if (screen->cur_ctx && screen->cur_ctx->state.tls_required)
+ nouveau_bo_validate(chan, screen->tls, flags);
+}
+
+int
+nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry)
+{
+ int i = screen->tic.next;
+
+ while (screen->tic.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
+
+ screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
+
+ if (screen->tic.entries[i])
+ nv50_tic_entry(screen->tic.entries[i])->id = -1;
+
+ screen->tic.entries[i] = entry;
+ return i;
+}
+
+int
+nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry)
+{
+ int i = screen->tsc.next;
+
+ while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
+
+ screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
+
+ if (screen->tsc.entries[i])
+ nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
+
+ screen->tsc.entries[i] = entry;
+ return i;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
new file mode 100644
index 0000000000..94bf0cf348
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -0,0 +1,148 @@
+#ifndef __NVC0_SCREEN_H__
+#define __NVC0_SCREEN_H__
+
+#define NOUVEAU_NVC0
+#include "nouveau/nouveau_screen.h"
+#include "nouveau/nouveau_mm.h"
+#undef NOUVEAU_NVC0
+#include "nvc0_winsys.h"
+#include "nvc0_stateobj.h"
+
+#define NVC0_TIC_MAX_ENTRIES 2048
+#define NVC0_TSC_MAX_ENTRIES 2048
+
+struct nvc0_context;
+
+#define NVC0_SCRATCH_SIZE (2 << 20)
+#define NVC0_SCRATCH_NR_BUFFERS 2
+
+struct nvc0_screen {
+ struct nouveau_screen base;
+ struct nouveau_winsys *nvws;
+
+ struct nvc0_context *cur_ctx;
+
+ struct nouveau_bo *text;
+ struct nouveau_bo *uniforms;
+ struct nouveau_bo *tls;
+ struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
+ struct nouveau_bo *vfetch_cache;
+
+ uint64_t tls_size;
+
+ struct nouveau_resource *text_heap;
+
+ struct {
+ struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS];
+ uint8_t *buf;
+ int index;
+ uint32_t offset;
+ } scratch;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
+ } tic;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
+ } tsc;
+
+ struct {
+ struct nouveau_bo *bo;
+ uint32_t *map;
+ } fence;
+
+ struct nouveau_mman *mm_VRAM_fe0;
+
+ struct nouveau_grobj *fermi;
+ struct nouveau_grobj *eng2d;
+ struct nouveau_grobj *m2mf;
+};
+
+static INLINE struct nvc0_screen *
+nvc0_screen(struct pipe_screen *screen)
+{
+ return (struct nvc0_screen *)screen;
+}
+
+void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
+
+int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
+int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
+
+static INLINE void
+nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
+{
+ struct nvc0_screen *screen = nvc0_screen(res->base.screen);
+
+ if (res->mm) {
+ nouveau_fence_ref(screen->base.fence.current, &res->fence);
+
+ if (flags & NOUVEAU_BO_WR)
+ nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
+ }
+}
+
+static INLINE void
+nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
+{
+ struct nvc0_screen *screen = nvc0_screen(res->base.screen);
+
+ if (likely(res->bo)) {
+ nouveau_bo_validate(screen->base.channel, res->bo, flags);
+
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ if (flags & NOUVEAU_BO_RD)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0_resource_fence(res, flags);
+ }
+}
+
+struct nvc0_format {
+ uint32_t rt;
+ uint32_t tic;
+ uint32_t vtx;
+ uint32_t usage;
+};
+
+extern const struct nvc0_format nvc0_format_table[];
+
+static INLINE void
+nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0)
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+}
+
+static INLINE void
+nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0)
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+}
+
+static INLINE void
+nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0) {
+ screen->tic.entries[tic->id] = NULL;
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+ }
+}
+
+static INLINE void
+nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0) {
+ screen->tsc.entries[tsc->id] = NULL;
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
new file mode 100644
index 0000000000..7294eaa222
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+#include "nvc0_context.h"
+
+static INLINE void
+nvc0_program_update_context_state(struct nvc0_context *nvc0,
+ struct nvc0_program *prog, int stage)
+{
+ if (prog->hdr[1])
+ nvc0->state.tls_required |= 1 << stage;
+ else
+ nvc0->state.tls_required &= ~(1 << stage);
+}
+
+static boolean
+nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+ int ret;
+ unsigned size;
+
+ if (prog->translated)
+ return TRUE;
+
+ prog->translated = nvc0_program_translate(prog);
+ if (!prog->translated)
+ return FALSE;
+
+ size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100);
+
+ ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog,
+ &prog->res);
+ if (ret)
+ return FALSE;
+
+ prog->code_base = prog->res->start;
+
+ nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->text, prog->code_base,
+ NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->text,
+ prog->code_base + NVC0_SHADER_HEADER_SIZE,
+ NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+
+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1);
+ OUT_RING (nvc0->screen->base.channel, 0x1111);
+
+ return TRUE;
+}
+
+void
+nvc0_vertprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *vp = nvc0->vertprog;
+
+ if (nvc0->clip.nr > vp->vp.num_ucps) {
+ assert(nvc0->clip.nr <= 6);
+ vp->vp.num_ucps = 6;
+
+ if (vp->translated)
+ nvc0_program_destroy(nvc0, vp);
+ }
+
+ if (!nvc0_program_validate(nvc0, vp))
+ return;
+ nvc0_program_update_context_state(nvc0, vp, 0);
+
+ BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2);
+ OUT_RING (chan, 0x11);
+ OUT_RING (chan, vp->code_base);
+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1);
+ OUT_RING (chan, vp->max_gpr);
+
+ // BEGIN_RING(chan, RING_3D_(0x163c), 1);
+ // OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(VERT_COLOR_CLAMP_EN), 1);
+ OUT_RING (chan, 1);
+}
+
+void
+nvc0_fragprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *fp = nvc0->fragprog;
+
+ if (!nvc0_program_validate(nvc0, fp))
+ return;
+ nvc0_program_update_context_state(nvc0, fp, 4);
+
+ BEGIN_RING(chan, RING_3D(SP_SELECT(5)), 2);
+ OUT_RING (chan, 0x51);
+ OUT_RING (chan, fp->code_base);
+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(5)), 1);
+ OUT_RING (chan, fp->max_gpr);
+
+ BEGIN_RING(chan, RING_3D_(0x0360), 2);
+ OUT_RING (chan, 0x20164010);
+ OUT_RING (chan, 0x20);
+ BEGIN_RING(chan, RING_3D_(0x196c), 1);
+ OUT_RING (chan, fp->flags[0]);
+}
+
+void
+nvc0_tctlprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *tp = nvc0->tctlprog;
+
+ if (!tp) {
+ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1);
+ OUT_RING (chan, 0x20);
+ return;
+ }
+ if (!nvc0_program_validate(nvc0, tp))
+ return;
+ nvc0_program_update_context_state(nvc0, tp, 1);
+
+ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2);
+ OUT_RING (chan, 0x21);
+ OUT_RING (chan, tp->code_base);
+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(2)), 1);
+ OUT_RING (chan, tp->max_gpr);
+}
+
+void
+nvc0_tevlprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *tp = nvc0->tevlprog;
+
+ if (!tp) {
+ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
+ OUT_RING (chan, 0x30);
+ return;
+ }
+ if (!nvc0_program_validate(nvc0, tp))
+ return;
+ nvc0_program_update_context_state(nvc0, tp, 2);
+
+ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
+ OUT_RING (chan, 0x31);
+ BEGIN_RING(chan, RING_3D(SP_START_ID(3)), 1);
+ OUT_RING (chan, tp->code_base);
+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(3)), 1);
+ OUT_RING (chan, tp->max_gpr);
+}
+
+void
+nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *gp = nvc0->gmtyprog;
+
+ if (!gp) {
+ BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
+ OUT_RING (chan, 0x40);
+ return;
+ }
+ if (!nvc0_program_validate(nvc0, gp))
+ return;
+ nvc0_program_update_context_state(nvc0, gp, 3);
+
+ BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
+ OUT_RING (chan, 0x41);
+ BEGIN_RING(chan, RING_3D(SP_START_ID(4)), 1);
+ OUT_RING (chan, gp->code_base);
+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1);
+ OUT_RING (chan, gp->max_gpr);
+}
+
+/* It's *is* kind of shader related. We need to inspect the program
+ * to get the output locations right.
+ */
+void
+nvc0_tfb_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *vp;
+ struct nvc0_transform_feedback_state *tfb = nvc0->tfb;
+ int b;
+
+ BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
+ if (!tfb) {
+ OUT_RING(chan, 0);
+ return;
+ }
+ OUT_RING(chan, 1);
+
+ vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog;
+
+ for (b = 0; b < nvc0->num_tfbbufs; ++b) {
+ uint8_t idx, var[128];
+ int i, n;
+ struct nv04_resource *buf = nv04_resource(nvc0->tfbbuf[b]);
+
+ BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5);
+ OUT_RING (chan, 1);
+ OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR);
+ OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR);
+ OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]);
+ OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */
+
+ if (!(nvc0->dirty & NVC0_NEW_TFB))
+ continue;
+
+ BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, tfb->varying_count[b]);
+ OUT_RING (chan, tfb->stride[b]);
+
+ n = b ? tfb->varying_count[b - 1] : 0;
+ i = 0;
+ for (; i < tfb->varying_count[b]; ++i) {
+ idx = tfb->varying_index[n + i];
+ var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3);
+ }
+ for (; i & 3; ++i)
+ var[i] = 0;
+
+ BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4);
+ OUT_RINGp (chan, var, i / 4);
+ }
+ for (; b < 4; ++b)
+ IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
new file mode 100644
index 0000000000..ab68abcfb5
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_state.c
@@ -0,0 +1,860 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nvc0_stateobj.h"
+#include "nvc0_context.h"
+
+#include "nvc0_3d.xml.h"
+#include "nv50/nv50_texture.xml.h"
+
+#include "nouveau/nouveau_gldefs.h"
+
+static INLINE uint32_t
+nvc0_colormask(unsigned mask)
+{
+ uint32_t ret = 0;
+
+ if (mask & PIPE_MASK_R)
+ ret |= 0x0001;
+ if (mask & PIPE_MASK_G)
+ ret |= 0x0010;
+ if (mask & PIPE_MASK_B)
+ ret |= 0x0100;
+ if (mask & PIPE_MASK_A)
+ ret |= 0x1000;
+
+ return ret;
+}
+
+#define NVC0_BLEND_FACTOR_CASE(a, b) \
+ case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b
+
+static INLINE uint32_t
+nvc0_blend_fac(unsigned factor)
+{
+ switch (factor) {
+ NVC0_BLEND_FACTOR_CASE(ONE, ONE);
+ NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE);
+ NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR);
+ NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(ZERO, ZERO);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA);
+ default:
+ return NV50_3D_BLEND_FACTOR_ZERO;
+ }
+}
+
+static void *
+nvc0_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj);
+ int i;
+
+ so->pipe = *cso;
+
+ SB_IMMED_3D(so, BLEND_INDEPENDENT, cso->independent_blend_enable);
+
+ if (!cso->independent_blend_enable) {
+ SB_BEGIN_3D(so, BLEND_ENABLES, 1);
+ SB_DATA (so, cso->rt[0].blend_enable ? 0xff : 0);
+
+ if (cso->rt[0].blend_enable) {
+ SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_src_factor));
+ SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1);
+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_dst_factor));
+ }
+
+ SB_BEGIN_3D(so, COLOR_MASK_BROADCAST, 1);
+ SB_DATA (so, nvc0_colormask(cso->rt[0].colormask));
+ } else {
+ uint8_t en = 0;
+
+ for (i = 0; i < 8; ++i) {
+ if (!cso->rt[i].blend_enable)
+ continue;
+ en |= 1 << i;
+
+ SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor));
+ }
+ SB_BEGIN_3D(so, BLEND_ENABLES, 1);
+ SB_DATA (so, en);
+
+ SB_BEGIN_3D(so, COLOR_MASK(0), 8);
+ for (i = 0; i < 8; ++i)
+ SB_DATA(so, nvc0_colormask(cso->rt[i].colormask));
+ }
+
+ if (cso->logicop_enable) {
+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0);
+ }
+
+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ return so;
+}
+
+static void
+nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->blend = hwcso;
+ nvc0->dirty |= NVC0_NEW_BLEND;
+}
+
+static void
+nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nvc0_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nvc0_rasterizer_stateobj *so;
+ uint32_t reg;
+
+ so = CALLOC_STRUCT(nvc0_rasterizer_stateobj);
+ if (!so)
+ return NULL;
+ so->pipe = *cso;
+
+ /* Scissor enables are handled in scissor state, we will not want to
+ * always emit 16 commands, one for each scissor rectangle, here.
+ */
+
+ SB_BEGIN_3D(so, SHADE_MODEL, 1);
+ SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
+ NVC0_3D_SHADE_MODEL_SMOOTH);
+ SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
+ SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
+
+ SB_BEGIN_3D(so, LINE_WIDTH, 1);
+ SB_DATA (so, fui(cso->line_width));
+ SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth);
+
+ SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1);
+ if (cso->line_stipple_enable) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1);
+ SB_DATA (so, (cso->line_stipple_pattern << 8) |
+ cso->line_stipple_factor);
+
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex);
+ if (!cso->point_size_per_vertex) {
+ SB_BEGIN_3D(so, POINT_SIZE, 1);
+ SB_DATA (so, fui(cso->point_size));
+ }
+
+ reg = (cso->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) ?
+ NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT :
+ NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT;
+
+ SB_BEGIN_3D(so, POINT_COORD_REPLACE, 1);
+ SB_DATA (so, ((cso->sprite_coord_enable & 0xff) << 3) | reg);
+ SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization);
+ SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth);
+
+ SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 1);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_front));
+ SB_BEGIN_3D(so, POLYGON_MODE_BACK, 1);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_back));
+ SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth);
+
+ SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3);
+ SB_DATA (so, cso->cull_face != PIPE_FACE_NONE);
+ SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW :
+ NVC0_3D_FRONT_FACE_CW);
+ switch (cso->cull_face) {
+ case PIPE_FACE_FRONT_AND_BACK:
+ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK);
+ break;
+ case PIPE_FACE_FRONT:
+ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT);
+ break;
+ case PIPE_FACE_BACK:
+ default:
+ SB_DATA(so, NVC0_3D_CULL_FACE_BACK);
+ break;
+ }
+
+ SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable);
+ SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3);
+ SB_DATA (so, cso->offset_point);
+ SB_DATA (so, cso->offset_line);
+ SB_DATA (so, cso->offset_tri);
+
+ if (cso->offset_point || cso->offset_line || cso->offset_tri) {
+ SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
+ SB_DATA (so, fui(cso->offset_scale));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
+ SB_DATA (so, fui(cso->offset_units * 2.0f));
+ }
+
+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->rast = hwcso;
+ nvc0->dirty |= NVC0_NEW_RASTERIZER;
+}
+
+static void
+nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nvc0_zsa_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj);
+
+ so->pipe = *cso;
+
+ SB_IMMED_3D(so, DEPTH_TEST_ENABLE, cso->depth.enabled);
+ if (cso->depth.enabled) {
+ SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask);
+ SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1);
+ SB_DATA (so, nvgl_comparison_op(cso->depth.func));
+ }
+
+ if (cso->stencil[0].enabled) {
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
+ SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2);
+ SB_DATA (so, cso->stencil[0].valuemask);
+ SB_DATA (so, cso->stencil[0].writemask);
+ } else {
+ SB_IMMED_3D(so, STENCIL_ENABLE, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ assert(cso->stencil[0].enabled);
+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func));
+ SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
+ SB_DATA (so, cso->stencil[1].writemask);
+ SB_DATA (so, cso->stencil[1].valuemask);
+ } else
+ if (cso->stencil[0].enabled) {
+ SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0);
+ }
+
+ SB_IMMED_3D(so, ALPHA_TEST_ENABLE, cso->alpha.enabled);
+ if (cso->alpha.enabled) {
+ SB_BEGIN_3D(so, ALPHA_TEST_REF, 2);
+ SB_DATA (so, fui(cso->alpha.ref_value));
+ SB_DATA (so, nvgl_comparison_op(cso->alpha.func));
+ }
+
+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->zsa = hwcso;
+ nvc0->dirty |= NVC0_NEW_ZSA;
+}
+
+static void
+nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+/* ====================== SAMPLERS AND TEXTURES ================================
+ */
+
+#define NV50_TSC_WRAP_CASE(n) \
+ case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
+
+static INLINE unsigned
+nv50_tsc_wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ NV50_TSC_WRAP_CASE(REPEAT);
+ NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(CLAMP);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ return NV50_TSC_WRAP_REPEAT;
+ }
+}
+
+static void
+nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ unsigned s, i;
+
+ for (s = 0; s < 5; ++s)
+ for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i)
+ if (nvc0_context(pipe)->samplers[s][i] == hwcso)
+ nvc0_context(pipe)->samplers[s][i] = NULL;
+
+ nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nv50_tsc_entry(hwcso));
+
+ FREE(hwcso);
+}
+
+static INLINE void
+nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
+ unsigned nr, void **hwcso)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tsc_entry *old = nvc0->samplers[s][i];
+
+ nvc0->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
+ if (old)
+ nvc0_screen_tsc_unlock(nvc0->screen, old);
+ }
+ for (; i < nvc0->num_samplers[s]; ++i)
+ if (nvc0->samplers[s][i])
+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+
+ nvc0->num_samplers[s] = nr;
+
+ nvc0->dirty |= NVC0_NEW_SAMPLERS;
+}
+
+static void
+nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s);
+}
+
+static void
+nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s);
+}
+
+static void
+nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
+}
+
+/* NOTE: only called when not referenced anywhere, won't be bound */
+static void
+nvc0_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+
+ nvc0_screen_tic_free(nvc0_context(pipe)->screen, nv50_tic_entry(view));
+
+ FREE(nv50_tic_entry(view));
+}
+
+static INLINE void
+nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+ if (old)
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+
+ pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]);
+ }
+
+ for (i = nr; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+ if (!old)
+ continue;
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+
+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+ }
+
+ nvc0->num_textures[s] = nr;
+
+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES);
+
+ nvc0->dirty |= NVC0_NEW_TEXTURES;
+}
+
+static void
+nvc0_vp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views);
+}
+
+static void
+nvc0_fp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views);
+}
+
+static void
+nvc0_gp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
+}
+
+/* ============================= SHADERS =======================================
+ */
+
+static void *
+nvc0_sp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso, unsigned type)
+{
+ struct nvc0_program *prog;
+
+ prog = CALLOC_STRUCT(nvc0_program);
+ if (!prog)
+ return NULL;
+
+ prog->type = type;
+ prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ return (void *)prog;
+}
+
+static void
+nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_program *prog = (struct nvc0_program *)hwcso;
+
+ nvc0_program_destroy(nvc0_context(pipe), prog);
+
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
+}
+
+static void *
+nvc0_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX);
+}
+
+static void
+nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->vertprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_VERTPROG;
+}
+
+static void *
+nvc0_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT);
+}
+
+static void
+nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->fragprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_FRAGPROG;
+}
+
+static void *
+nvc0_gp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY);
+}
+
+static void
+nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->gmtyprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_GMTYPROG;
+}
+
+static void
+nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ struct pipe_resource *res)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX: shader = 0; break;
+ /*
+ case PIPE_SHADER_TESSELLATION_CONTROL: shader = 1; break;
+ case PIPE_SHADER_TESSELLATION_EVALUATION: shader = 2; break;
+ */
+ case PIPE_SHADER_GEOMETRY: shader = 3; break;
+ case PIPE_SHADER_FRAGMENT: shader = 4; break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (nvc0->constbuf[shader][index])
+ nvc0_bufctx_del_resident(nvc0, NVC0_BUFCTX_CONSTANT,
+ nv04_resource(nvc0->constbuf[shader][index]));
+
+ pipe_resource_reference(&nvc0->constbuf[shader][index], res);
+
+ nvc0->constbuf_dirty[shader] |= 1 << index;
+
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+}
+
+/* =============================================================================
+ */
+
+static void
+nvc0_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->blend_colour = *bcol;
+ nvc0->dirty |= NVC0_NEW_BLEND_COLOUR;
+}
+
+static void
+nvc0_set_stencil_ref(struct pipe_context *pipe,
+ const struct pipe_stencil_ref *sr)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->stencil_ref = *sr;
+ nvc0->dirty |= NVC0_NEW_STENCIL_REF;
+}
+
+static void
+nvc0_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ const unsigned size = clip->nr * sizeof(clip->ucp[0]);
+
+ memcpy(&nvc0->clip.ucp[0][0], &clip->ucp[0][0], size);
+ nvc0->clip.nr = clip->nr;
+
+ nvc0->clip.depth_clamp = clip->depth_clamp;
+
+ nvc0->dirty |= NVC0_NEW_CLIP;
+}
+
+static void
+nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->sample_mask = sample_mask;
+ nvc0->dirty |= NVC0_NEW_SAMPLE_MASK;
+}
+
+
+static void
+nvc0_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->framebuffer = *fb;
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
+nvc0_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->stipple = *stipple;
+ nvc0->dirty |= NVC0_NEW_STIPPLE;
+}
+
+static void
+nvc0_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->scissor = *scissor;
+ nvc0->dirty |= NVC0_NEW_SCISSOR;
+}
+
+static void
+nvc0_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->viewport = *vpt;
+ nvc0->dirty |= NVC0_NEW_VIEWPORT;
+}
+
+static void
+nvc0_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < count; ++i)
+ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer);
+ for (; i < nvc0->num_vtxbufs; ++i)
+ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
+
+ memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count);
+ nvc0->num_vtxbufs = count;
+
+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX);
+
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+}
+
+static void
+nvc0_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ if (ib) {
+ pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer);
+
+ memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf));
+ } else {
+ pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
+ }
+}
+
+static void
+nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->vertex = hwcso;
+ nvc0->dirty |= NVC0_NEW_VERTEX;
+}
+
+static void *
+nvc0_tfb_state_create(struct pipe_context *pipe,
+ const struct pipe_stream_output_state *pso)
+{
+ struct nvc0_transform_feedback_state *so;
+ int n = 0;
+ int i, c, b;
+
+ so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t));
+ if (!so)
+ return NULL;
+
+ for (b = 0; b < 4; ++b) {
+ for (i = 0; i < pso->num_outputs; ++i) {
+ if (pso->output_buffer[i] != b)
+ continue;
+ for (c = 0; c < 4; ++c) {
+ if (!(pso->register_mask[i] & (1 << c)))
+ continue;
+ so->varying_count[b]++;
+ so->varying_index[n++] = (pso->register_index[i] << 2) | c;
+ }
+ }
+ so->stride[b] = so->varying_count[b] * 4;
+ }
+ if (pso->stride)
+ so->stride[0] = pso->stride;
+
+ return so;
+}
+
+static void
+nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ nvc0_context(pipe)->tfb = hwcso;
+ nvc0_context(pipe)->dirty |= NVC0_NEW_TFB;
+}
+
+static void
+nvc0_set_transform_feedback_buffers(struct pipe_context *pipe,
+ struct pipe_resource **buffers,
+ int *offsets,
+ int num_buffers)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ int i;
+
+ assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */
+
+ for (i = 0; i < num_buffers; ++i) {
+ assert(offsets[i] >= 0);
+ nvc0->tfb_offset[i] = offsets[i];
+ pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]);
+ }
+ for (; i < nvc0->num_tfbbufs; ++i)
+ pipe_resource_reference(&nvc0->tfbbuf[i], NULL);
+
+ nvc0->num_tfbbufs = num_buffers;
+
+ nvc0->dirty |= NVC0_NEW_TFB_BUFFERS;
+}
+
+void
+nvc0_init_state_functions(struct nvc0_context *nvc0)
+{
+ struct pipe_context *pipe = &nvc0->base.pipe;
+
+ pipe->create_blend_state = nvc0_blend_state_create;
+ pipe->bind_blend_state = nvc0_blend_state_bind;
+ pipe->delete_blend_state = nvc0_blend_state_delete;
+
+ pipe->create_rasterizer_state = nvc0_rasterizer_state_create;
+ pipe->bind_rasterizer_state = nvc0_rasterizer_state_bind;
+ pipe->delete_rasterizer_state = nvc0_rasterizer_state_delete;
+
+ pipe->create_depth_stencil_alpha_state = nvc0_zsa_state_create;
+ pipe->bind_depth_stencil_alpha_state = nvc0_zsa_state_bind;
+ pipe->delete_depth_stencil_alpha_state = nvc0_zsa_state_delete;
+
+ pipe->create_sampler_state = nv50_sampler_state_create;
+ pipe->delete_sampler_state = nvc0_sampler_state_delete;
+ pipe->bind_vertex_sampler_states = nvc0_vp_sampler_states_bind;
+ pipe->bind_fragment_sampler_states = nvc0_fp_sampler_states_bind;
+ pipe->bind_geometry_sampler_states = nvc0_gp_sampler_states_bind;
+
+ pipe->create_sampler_view = nvc0_create_sampler_view;
+ pipe->sampler_view_destroy = nvc0_sampler_view_destroy;
+ pipe->set_vertex_sampler_views = nvc0_vp_set_sampler_views;
+ pipe->set_fragment_sampler_views = nvc0_fp_set_sampler_views;
+ pipe->set_geometry_sampler_views = nvc0_gp_set_sampler_views;
+
+ pipe->create_vs_state = nvc0_vp_state_create;
+ pipe->create_fs_state = nvc0_fp_state_create;
+ pipe->create_gs_state = nvc0_gp_state_create;
+ pipe->bind_vs_state = nvc0_vp_state_bind;
+ pipe->bind_fs_state = nvc0_fp_state_bind;
+ pipe->bind_gs_state = nvc0_gp_state_bind;
+ pipe->delete_vs_state = nvc0_sp_state_delete;
+ pipe->delete_fs_state = nvc0_sp_state_delete;
+ pipe->delete_gs_state = nvc0_sp_state_delete;
+
+ pipe->set_blend_color = nvc0_set_blend_color;
+ pipe->set_stencil_ref = nvc0_set_stencil_ref;
+ pipe->set_clip_state = nvc0_set_clip_state;
+ pipe->set_sample_mask = nvc0_set_sample_mask;
+ pipe->set_constant_buffer = nvc0_set_constant_buffer;
+ pipe->set_framebuffer_state = nvc0_set_framebuffer_state;
+ pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
+ pipe->set_scissor_state = nvc0_set_scissor_state;
+ pipe->set_viewport_state = nvc0_set_viewport_state;
+
+ pipe->create_vertex_elements_state = nvc0_vertex_state_create;
+ pipe->delete_vertex_elements_state = nvc0_vertex_state_delete;
+ pipe->bind_vertex_elements_state = nvc0_vertex_state_bind;
+
+ pipe->set_vertex_buffers = nvc0_set_vertex_buffers;
+ pipe->set_index_buffer = nvc0_set_index_buffer;
+
+ pipe->create_stream_output_state = nvc0_tfb_state_create;
+ pipe->delete_stream_output_state = nvc0_tfb_state_delete;
+ pipe->bind_stream_output_state = nvc0_tfb_state_bind;
+ pipe->set_stream_output_buffers = nvc0_set_transform_feedback_buffers;
+
+ pipe->redefine_user_buffer = u_default_redefine_user_buffer;
+}
+
diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
new file mode 100644
index 0000000000..bb81480bab
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
@@ -0,0 +1,483 @@
+
+#include "nvc0_context.h"
+#include "os/os_time.h"
+
+static void
+nvc0_validate_zcull(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ struct nvc0_surface *sf = nvc0_surface(fb->zsbuf);
+ struct nvc0_miptree *mt = nvc0_miptree(sf->base.texture);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t size;
+ uint32_t offset = align(mt->total_size, 1 << 17);
+ unsigned width, height;
+
+ assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
+
+ size = mt->total_size * 2;
+
+ height = align(fb->height, 32);
+ width = fb->width % 224;
+ if (width)
+ width = fb->width + (224 - width);
+ else
+ width = fb->width;
+
+ MARK_RING (chan, 23, 4);
+ BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */
+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ offset += 1 << 17;
+ BEGIN_RING(chan, RING_3D_(0x07f0), 2); /* ZCULL_ADDRESS_B_HIGH */
+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ BEGIN_RING(chan, RING_3D_(0x07e0), 2);
+ OUT_RING (chan, size);
+ OUT_RING (chan, size >> 16);
+ BEGIN_RING(chan, RING_3D_(0x15c8), 1); /* bits 0x3 */
+ OUT_RING (chan, 2);
+ BEGIN_RING(chan, RING_3D_(0x07c0), 4); /* ZCULL dimensions */
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x15fc), 2);
+ OUT_RING (chan, 0); /* bits 0xffff */
+ OUT_RING (chan, 0); /* bits 0xffff */
+ BEGIN_RING(chan, RING_3D_(0x1958), 1);
+ OUT_RING (chan, 0); /* bits ~0 */
+}
+
+static void
+nvc0_validate_fb(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ unsigned i;
+ boolean serialize = FALSE;
+
+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME);
+
+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+ OUT_RING (chan, (076543210 << 4) | fb->nr_cbufs);
+ BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2);
+ OUT_RING (chan, fb->width << 16);
+ OUT_RING (chan, fb->height << 16);
+
+ MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs);
+
+ for (i = 0; i < fb->nr_cbufs; ++i) {
+ struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture);
+ struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t offset = sf->offset;
+
+ BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 9);
+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ OUT_RING (chan, nvc0_format_table[sf->base.format].rt);
+ OUT_RING (chan, (mt->layout_3d << 16) |
+ mt->level[sf->base.u.tex.level].tile_mode);
+ OUT_RING (chan, sf->base.u.tex.first_layer + sf->depth);
+ OUT_RING (chan, mt->layer_stride >> 2);
+ OUT_RING (chan, sf->base.u.tex.first_layer);
+
+ if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ /* only register for writing, otherwise we'd always serialize here */
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ }
+
+ if (fb->zsbuf) {
+ struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture);
+ struct nvc0_surface *sf = nvc0_surface(fb->zsbuf);
+ struct nouveau_bo *bo = mt->base.bo;
+ int unk = mt->base.base.target == PIPE_TEXTURE_2D;
+ uint32_t offset = sf->offset;
+
+ MARK_RING (chan, 12, 2);
+ BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, nvc0_format_table[fb->zsbuf->format].rt);
+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
+ OUT_RING (chan, mt->layer_stride >> 2);
+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ OUT_RING (chan, (unk << 16) |
+ (sf->base.u.tex.first_layer + sf->depth));
+ BEGIN_RING(chan, RING_3D(ZETA_BASE_LAYER), 1);
+ OUT_RING (chan, sf->base.u.tex.first_layer);
+
+ if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ } else {
+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+ OUT_RING (chan, 0);
+ }
+
+ if (serialize) {
+ BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
+ OUT_RING (chan, 0);
+ }
+}
+
+static void
+nvc0_validate_blend_colour(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4);
+ OUT_RINGf (chan, nvc0->blend_colour.color[0]);
+ OUT_RINGf (chan, nvc0->blend_colour.color[1]);
+ OUT_RINGf (chan, nvc0->blend_colour.color[2]);
+ OUT_RINGf (chan, nvc0->blend_colour.color[3]);
+}
+
+static void
+nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1);
+ OUT_RING (chan, nvc0->stencil_ref.ref_value[0]);
+ BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1);
+ OUT_RING (chan, nvc0->stencil_ref.ref_value[1]);
+}
+
+static void
+nvc0_validate_stipple(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ unsigned i;
+
+ BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
+ for (i = 0; i < 32; ++i)
+ OUT_RING(chan, util_bswap32(nvc0->stipple.stipple[i]));
+}
+
+static void
+nvc0_validate_scissor(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_scissor_state *s = &nvc0->scissor;
+
+ if (!(nvc0->dirty & NVC0_NEW_SCISSOR) &&
+ nvc0->rast->pipe.scissor == nvc0->state.scissor)
+ return;
+ nvc0->state.scissor = nvc0->rast->pipe.scissor;
+
+ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+ if (nvc0->rast->pipe.scissor) {
+ OUT_RING(chan, (s->maxx << 16) | s->minx);
+ OUT_RING(chan, (s->maxy << 16) | s->miny);
+ } else {
+ OUT_RING(chan, (0xffff << 16) | 0);
+ OUT_RING(chan, (0xffff << 16) | 0);
+ }
+}
+
+static void
+nvc0_validate_viewport(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_viewport_state *vp = &nvc0->viewport;
+ int x, y, w, h;
+ float zmin, zmax;
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3);
+ OUT_RINGf (chan, vp->translate[0]);
+ OUT_RINGf (chan, vp->translate[1]);
+ OUT_RINGf (chan, vp->translate[2]);
+ BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3);
+ OUT_RINGf (chan, vp->scale[0]);
+ OUT_RINGf (chan, vp->scale[1]);
+ OUT_RINGf (chan, vp->scale[2]);
+
+ /* now set the viewport rectangle to viewport dimensions for clipping */
+
+ x = (int)(vp->translate[0] - fabsf(vp->scale[0]));
+ y = (int)(vp->translate[1] - fabsf(vp->scale[1]));
+ w = (int)fabsf(2.0f * vp->scale[0]);
+ h = (int)fabsf(2.0f * vp->scale[1]);
+ zmin = vp->translate[2] - fabsf(vp->scale[2]);
+ zmax = vp->translate[2] + fabsf(vp->scale[2]);
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+ OUT_RING (chan, (w << 16) | x);
+ OUT_RING (chan, (h << 16) | y);
+ BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
+ OUT_RINGf (chan, zmin);
+ OUT_RINGf (chan, zmax);
+}
+
+static void
+nvc0_validate_clip(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ uint32_t clip;
+
+ if (nvc0->clip.depth_clamp) {
+ clip =
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2;
+ } else {
+ clip = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1;
+ }
+
+ BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+ OUT_RING (chan, clip);
+
+ if (nvc0->clip.nr) {
+ struct nouveau_bo *bo = nvc0->screen->uniforms;
+
+ MARK_RING (chan, 6 + nvc0->clip.nr * 4, 2);
+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+ OUT_RING (chan, 256);
+ OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING_1I(chan, RING_3D(CB_POS), nvc0->clip.nr * 4 + 1);
+ OUT_RING (chan, 0);
+ OUT_RINGp (chan, &nvc0->clip.ucp[0][0], nvc0->clip.nr * 4);
+
+ BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1);
+ OUT_RING (chan, (1 << nvc0->clip.nr) - 1);
+ } else {
+ IMMED_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 0);
+ }
+}
+
+static void
+nvc0_validate_blend(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ WAIT_RING(chan, nvc0->blend->size);
+ OUT_RINGp(chan, nvc0->blend->state, nvc0->blend->size);
+}
+
+static void
+nvc0_validate_zsa(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ WAIT_RING(chan, nvc0->zsa->size);
+ OUT_RINGp(chan, nvc0->zsa->state, nvc0->zsa->size);
+}
+
+static void
+nvc0_validate_rasterizer(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ WAIT_RING(chan, nvc0->rast->size);
+ OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size);
+}
+
+static void
+nvc0_constbufs_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nouveau_bo *bo;
+ unsigned s;
+
+ for (s = 0; s < 5; ++s) {
+ struct nv04_resource *res;
+ int i;
+
+ while (nvc0->constbuf_dirty[s]) {
+ unsigned base = 0;
+ unsigned offset = 0, words = 0;
+ boolean rebind = TRUE;
+
+ i = ffs(nvc0->constbuf_dirty[s]) - 1;
+ nvc0->constbuf_dirty[s] &= ~(1 << i);
+
+ res = nv04_resource(nvc0->constbuf[s][i]);
+ if (!res) {
+ BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1);
+ OUT_RING (chan, (i << 4) | 0);
+ if (i == 0)
+ nvc0->state.uniform_buffer_bound[s] = 0;
+ continue;
+ }
+
+ if (!nouveau_resource_mapped_by_gpu(&res->base)) {
+ if (i == 0) {
+ base = s << 16;
+ bo = nvc0->screen->uniforms;
+
+ if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0)
+ rebind = FALSE;
+ else
+ nvc0->state.uniform_buffer_bound[s] =
+ align(res->base.width0, 0x100);
+ } else {
+ bo = res->bo;
+ }
+#if 0
+ nvc0_m2mf_push_linear(nvc0, bo, NOUVEAU_BO_VRAM,
+ base, res->base.width0, res->data);
+ BEGIN_RING(chan, RING_3D_(0x021c), 1);
+ OUT_RING (chan, 0x1111);
+#else
+ words = res->base.width0 / 4;
+#endif
+ } else {
+ bo = res->bo;
+ if (i == 0)
+ nvc0->state.uniform_buffer_bound[s] = 0;
+ }
+
+ if (bo != nvc0->screen->uniforms)
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_CONSTANT, res,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+
+ if (rebind) {
+ MARK_RING (chan, 4, 2);
+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+ OUT_RING (chan, align(res->base.width0, 0x100));
+ OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1);
+ OUT_RING (chan, (i << 4) | 1);
+ }
+
+ while (words) {
+ unsigned nr = AVAIL_RING(chan);
+
+ if (nr < 16) {
+ FIRE_RING(chan);
+ continue;
+ }
+ nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1);
+
+ MARK_RING (chan, nr + 5, 2);
+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+ OUT_RING (chan, align(res->base.width0, 0x100));
+ OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING_1I(chan, RING_3D(CB_POS), nr + 1);
+ OUT_RING (chan, offset);
+ OUT_RINGp (chan, &res->data[offset], nr);
+
+ offset += nr * 4;
+ words -= nr;
+ }
+ }
+ }
+}
+
+static void
+nvc0_validate_derived_1(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ boolean early_z;
+
+ early_z = nvc0->fragprog->fp.early_z && !nvc0->zsa->pipe.alpha.enabled;
+
+ if (early_z != nvc0->state.early_z) {
+ nvc0->state.early_z = early_z;
+ IMMED_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), early_z);
+ }
+}
+
+static void
+nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
+{
+ struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx;
+
+ if (ctx_from)
+ ctx_to->state = ctx_from->state;
+
+ ctx_to->dirty = ~0;
+
+ if (!ctx_to->vertex)
+ ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
+
+ if (!ctx_to->vertprog)
+ ctx_to->dirty &= ~NVC0_NEW_VERTPROG;
+ if (!ctx_to->fragprog)
+ ctx_to->dirty &= ~NVC0_NEW_FRAGPROG;
+
+ if (!ctx_to->blend)
+ ctx_to->dirty &= ~NVC0_NEW_BLEND;
+ if (!ctx_to->rast)
+ ctx_to->dirty &= ~NVC0_NEW_RASTERIZER;
+ if (!ctx_to->zsa)
+ ctx_to->dirty &= ~NVC0_NEW_ZSA;
+
+ ctx_to->screen->base.channel->user_private = ctx_to->screen->cur_ctx =
+ ctx_to;
+}
+
+static struct state_validate {
+ void (*func)(struct nvc0_context *);
+ uint32_t states;
+} validate_list[] = {
+ { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER },
+ { nvc0_validate_blend, NVC0_NEW_BLEND },
+ { nvc0_validate_zsa, NVC0_NEW_ZSA },
+ { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER },
+ { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR },
+ { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF },
+ { nvc0_validate_stipple, NVC0_NEW_STIPPLE },
+ { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER },
+ { nvc0_validate_viewport, NVC0_NEW_VIEWPORT },
+ { nvc0_validate_clip, NVC0_NEW_CLIP },
+ { nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
+ { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG },
+ { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
+ { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
+ { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
+ { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA },
+ { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
+ { nvc0_validate_textures, NVC0_NEW_TEXTURES },
+ { nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
+ { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
+ { nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS }
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
+
+boolean
+nvc0_state_validate(struct nvc0_context *nvc0)
+{
+ unsigned i;
+
+ if (nvc0->screen->cur_ctx != nvc0)
+ nvc0_switch_pipe_context(nvc0);
+
+ if (nvc0->dirty) {
+ for (i = 0; i < validate_list_len; ++i) {
+ struct state_validate *validate = &validate_list[i];
+
+ if (nvc0->dirty & validate->states)
+ validate->func(nvc0);
+ }
+ nvc0->dirty = 0;
+ }
+
+ nvc0_bufctx_emit_relocs(nvc0);
+
+ return TRUE;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h
new file mode 100644
index 0000000000..8222f9375e
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h
@@ -0,0 +1,60 @@
+
+#ifndef __NVC0_STATEOBJ_H__
+#define __NVC0_STATEOBJ_H__
+
+#include "pipe/p_state.h"
+
+#define SB_BEGIN_3D(so, m, s) \
+ (so)->state[(so)->size++] = \
+ (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
+
+#define SB_IMMED_3D(so, m, d) \
+ (so)->state[(so)->size++] = \
+ (0x8 << 28) | ((d) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
+
+#define SB_DATA(so, u) (so)->state[(so)->size++] = (u)
+
+#include "nv50/nv50_stateobj_tex.h"
+
+struct nvc0_blend_stateobj {
+ struct pipe_blend_state pipe;
+ int size;
+ uint32_t state[72];
+};
+
+struct nvc0_rasterizer_stateobj {
+ struct pipe_rasterizer_state pipe;
+ int size;
+ uint32_t state[36];
+};
+
+struct nvc0_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state pipe;
+ int size;
+ uint32_t state[29];
+};
+
+struct nvc0_vertex_element {
+ struct pipe_vertex_element pipe;
+ uint32_t state;
+};
+
+struct nvc0_vertex_stateobj {
+ struct translate *translate;
+ unsigned num_elements;
+ uint32_t instance_elts;
+ uint32_t instance_bufs;
+ boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
+ unsigned vtx_size;
+ unsigned vtx_per_packet_max;
+ struct nvc0_vertex_element element[0];
+};
+
+/* will have to lookup index -> location qualifier from nvc0_program */
+struct nvc0_transform_feedback_state {
+ uint32_t stride[4];
+ uint8_t varying_count[4];
+ uint8_t varying_index[0];
+};
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
new file mode 100644
index 0000000000..fc5f45ea25
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include "pipe/p_defines.h"
+
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+#include "util/u_format.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+#include "nvc0_transfer.h"
+
+#include "nv50/nv50_defs.xml.h"
+
+#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
+
+/* return TRUE for formats that can be converted among each other by NVC0_2D */
+static INLINE boolean
+nvc0_2d_format_faithful(enum pipe_format format)
+{
+ uint8_t id = nvc0_format_table[format].rt;
+
+ return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
+}
+
+static INLINE uint8_t
+nvc0_2d_format(enum pipe_format format)
+{
+ uint8_t id = nvc0_format_table[format].rt;
+
+ /* Hardware values for color formats range from 0xc0 to 0xff,
+ * but the 2D engine doesn't support all of them.
+ */
+ if (nvc0_2d_format_faithful(format))
+ return id;
+
+ switch (util_format_get_blocksize(format)) {
+ case 1:
+ return NV50_SURFACE_FORMAT_R8_UNORM;
+ case 2:
+ return NV50_SURFACE_FORMAT_R16_UNORM;
+ case 4:
+ return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM;
+ case 8:
+ return NV50_SURFACE_FORMAT_R16G16B16A16_UNORM;
+ case 16:
+ return NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT;
+ default:
+ return 0;
+ }
+}
+
+static int
+nvc0_2d_texture_set(struct nouveau_channel *chan, int dst,
+ struct nvc0_miptree *mt, unsigned level, unsigned layer)
+{
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t width, height, depth;
+ uint32_t format;
+ uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT;
+ uint32_t flags = mt->base.domain | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD);
+ uint32_t offset = mt->level[level].offset;
+
+ format = nvc0_2d_format(mt->base.base.format);
+ if (!format) {
+ NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+ util_format_name(mt->base.base.format));
+ return 1;
+ }
+
+ width = u_minify(mt->base.base.width0, level);
+ height = u_minify(mt->base.base.height0, level);
+ depth = u_minify(mt->base.base.depth0, level);
+
+ /* layer has to be < depth, and depth > tile depth / 2 */
+
+ if (!mt->layout_3d) {
+ offset += mt->layer_stride * layer;
+ layer = 0;
+ depth = 1;
+ } else
+ if (!dst) {
+ offset += nvc0_miptree_zslice_offset(mt, level, layer);
+ layer = 0;
+ }
+
+ if (!(bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)) {
+ BEGIN_RING(chan, RING_2D_(mthd), 2);
+ OUT_RING (chan, format);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5);
+ OUT_RING (chan, mt->level[level].pitch);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ OUT_RELOCh(chan, bo, offset, flags);
+ OUT_RELOCl(chan, bo, offset, flags);
+ } else {
+ BEGIN_RING(chan, RING_2D_(mthd), 5);
+ OUT_RING (chan, format);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, mt->level[level].tile_mode);
+ OUT_RING (chan, depth);
+ OUT_RING (chan, layer);
+ BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ OUT_RELOCh(chan, bo, offset, flags);
+ OUT_RELOCl(chan, bo, offset, flags);
+ }
+
+#if 0
+ if (dst) {
+ BEGIN_RING(chan, RING_2D_(NVC0_2D_CLIP_X), 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ }
+#endif
+ return 0;
+}
+
+static int
+nvc0_2d_texture_do_copy(struct nouveau_channel *chan,
+ struct nvc0_miptree *dst, unsigned dst_level,
+ unsigned dx, unsigned dy, unsigned dz,
+ struct nvc0_miptree *src, unsigned src_level,
+ unsigned sx, unsigned sy, unsigned sz,
+ unsigned w, unsigned h)
+{
+ int ret;
+
+ ret = MARK_RING(chan, 2 * 16 + 32, 4);
+ if (ret)
+ return ret;
+
+ ret = nvc0_2d_texture_set(chan, 1, dst, dst_level, dz);
+ if (ret)
+ return ret;
+
+ ret = nvc0_2d_texture_set(chan, 0, src, src_level, sz);
+ if (ret)
+ return ret;
+
+ /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */
+ BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4);
+ OUT_RING (chan, dx);
+ OUT_RING (chan, dy);
+ OUT_RING (chan, w);
+ OUT_RING (chan, h);
+ BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sx);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sy);
+
+ return 0;
+}
+
+static void
+nvc0_setup_m2mf_rect(struct nvc0_m2mf_rect *rect,
+ struct pipe_resource *restrict res, unsigned l,
+ unsigned x, unsigned y, unsigned z)
+{
+ struct nvc0_miptree *mt = nvc0_miptree(res);
+ const unsigned w = u_minify(res->width0, l);
+ const unsigned h = u_minify(res->height0, l);
+
+ rect->bo = mt->base.bo;
+ rect->domain = mt->base.domain;
+ rect->base = mt->level[l].offset;
+ rect->pitch = mt->level[l].pitch;
+ if (util_format_is_plain(res->format)) {
+ rect->width = w;
+ rect->height = h;
+ rect->x = x;
+ rect->y = y;
+ } else {
+ rect->width = util_format_get_nblocksx(res->format, w);
+ rect->height = util_format_get_nblocksy(res->format, h);
+ rect->x = util_format_get_nblocksx(res->format, x);
+ rect->y = util_format_get_nblocksy(res->format, y);
+ }
+ rect->tile_mode = mt->level[l].tile_mode;
+ rect->cpp = util_format_get_blocksize(res->format);
+
+ if (mt->layout_3d) {
+ rect->z = z;
+ rect->depth = u_minify(res->depth0, l);
+ } else {
+ rect->base += z * mt->layer_stride;
+ rect->z = 0;
+ rect->depth = 1;
+ }
+}
+
+static void
+nvc0_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct nvc0_screen *screen = nvc0_context(pipe)->screen;
+ int ret;
+ unsigned dst_layer = dstz, src_layer = src_box->z;
+
+ nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+
+ if (src->format == dst->format) {
+ struct nvc0_m2mf_rect drect, srect;
+ unsigned i;
+ unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
+ unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
+
+ nvc0_setup_m2mf_rect(&drect, dst, dst_level, dstx, dsty, dstz);
+ nvc0_setup_m2mf_rect(&srect, src, src_level,
+ src_box->x, src_box->y, src_box->z);
+
+ for (i = 0; i < src_box->depth; ++i) {
+ nvc0_m2mf_transfer_rect(&screen->base.base, &drect, &srect, nx, ny);
+
+ if (nvc0_miptree(dst)->layout_3d)
+ drect.z++;
+ else
+ drect.base += nvc0_miptree(dst)->layer_stride;
+
+ if (nvc0_miptree(src)->layout_3d)
+ srect.z++;
+ else
+ srect.base += nvc0_miptree(src)->layer_stride;
+ }
+ return;
+ }
+
+ assert(nvc0_2d_format_faithful(src->format));
+ assert(nvc0_2d_format_faithful(dst->format));
+
+ for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) {
+ ret = nvc0_2d_texture_do_copy(screen->base.channel,
+ nvc0_miptree(dst), dst_level,
+ dstx, dsty, dst_layer,
+ nvc0_miptree(src), src_level,
+ src_box->x, src_box->y, src_layer,
+ src_box->width, src_box->height);
+ if (ret)
+ return;
+ }
+}
+
+static void
+nvc0_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nvc0_context *nv50 = nvc0_context(pipe);
+ struct nvc0_screen *screen = nv50->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nvc0_miptree *mt = nvc0_miptree(dst->texture);
+ struct nvc0_surface *sf = nvc0_surface(dst);
+ struct nouveau_bo *bo = mt->base.bo;
+
+ BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
+ OUT_RINGf (chan, rgba[0]);
+ OUT_RINGf (chan, rgba[1]);
+ OUT_RINGf (chan, rgba[2]);
+ OUT_RINGf (chan, rgba[3]);
+
+ if (MARK_RING(chan, 18, 2))
+ return;
+
+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 9);
+ OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ OUT_RING (chan, nvc0_format_table[dst->format].rt);
+ OUT_RING (chan, (mt->layout_3d << 16) |
+ mt->level[sf->base.u.tex.level].tile_mode);
+ OUT_RING (chan, dst->u.tex.first_layer + sf->depth);
+ OUT_RING (chan, mt->layer_stride >> 2);
+ OUT_RING (chan, dst->u.tex.first_layer);
+
+ BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2);
+ OUT_RING (chan, ((dstx + width) << 16) | dstx);
+ OUT_RING (chan, ((dsty + height) << 16) | dsty);
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1);
+
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, 0x3c);
+
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0);
+
+ nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
+nvc0_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nvc0_context *nv50 = nvc0_context(pipe);
+ struct nvc0_screen *screen = nv50->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nvc0_miptree *mt = nvc0_miptree(dst->texture);
+ struct nvc0_surface *sf = nvc0_surface(dst);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t mode = 0;
+ int unk = mt->base.base.target == PIPE_TEXTURE_2D;
+
+ if (clear_flags & PIPE_CLEAR_DEPTH) {
+ BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
+ OUT_RINGf (chan, depth);
+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (clear_flags & PIPE_CLEAR_STENCIL) {
+ BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
+ OUT_RING (chan, stencil & 0xff);
+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
+ }
+
+ if (MARK_RING(chan, 17, 2))
+ return;
+
+ BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
+ OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (chan, nvc0_format_table[dst->format].rt);
+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
+ OUT_RING (chan, mt->layer_stride >> 2);
+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ OUT_RING (chan, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
+ BEGIN_RING(chan, RING_3D(ZETA_BASE_LAYER), 1);
+ OUT_RING (chan, dst->u.tex.first_layer);
+
+ BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2);
+ OUT_RING (chan, ((dstx + width) << 16) | dstx);
+ OUT_RING (chan, ((dsty + height) << 16) | dsty);
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1);
+
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, mode);
+
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0);
+
+ nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+void
+nvc0_clear(struct pipe_context *pipe, unsigned buffers,
+ const float *rgba, double depth, unsigned stencil)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ unsigned i;
+ const unsigned dirty = nvc0->dirty;
+ uint32_t mode = 0;
+
+ /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
+ nvc0->dirty &= NVC0_NEW_FRAMEBUFFER;
+ if (!nvc0_state_validate(nvc0))
+ return;
+
+ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+ BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
+ OUT_RINGf (chan, rgba[0]);
+ OUT_RINGf (chan, rgba[1]);
+ OUT_RINGf (chan, rgba[2]);
+ OUT_RINGf (chan, rgba[3]);
+ mode =
+ NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G |
+ NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A;
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
+ OUT_RING (chan, fui(depth));
+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
+ OUT_RING (chan, stencil & 0xff);
+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
+ }
+
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, mode);
+
+ for (i = 1; i < fb->nr_cbufs; i++) {
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, (i << 6) | 0x3c);
+ }
+
+ nvc0->dirty = dirty & ~NVC0_NEW_FRAMEBUFFER;
+}
+
+void
+nvc0_init_surface_functions(struct nvc0_context *nvc0)
+{
+ struct pipe_context *pipe = &nvc0->base.pipe;
+
+ pipe->resource_copy_region = nvc0_resource_copy_region;
+ pipe->clear_render_target = nvc0_clear_render_target;
+ pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
+}
+
+
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
new file mode 100644
index 0000000000..24850b1998
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+#include "nv50/nv50_texture.xml.h"
+
+#include "util/u_format.h"
+
+#define NV50_TIC_0_SWIZZLE__MASK \
+ (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
+ NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
+
+static INLINE uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+{
+ switch (swz) {
+ case PIPE_SWIZZLE_RED:
+ return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
+ case PIPE_SWIZZLE_GREEN:
+ return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
+ case PIPE_SWIZZLE_BLUE:
+ return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
+ case PIPE_SWIZZLE_ALPHA:
+ return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_ONE:
+ return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT;
+ case PIPE_SWIZZLE_ZERO:
+ default:
+ return NV50_TIC_MAP_ZERO;
+ }
+}
+
+struct pipe_sampler_view *
+nvc0_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ)
+{
+ const struct util_format_description *desc;
+ uint32_t *tic;
+ uint32_t swz[4];
+ uint32_t depth;
+ struct nv50_tic_entry *view;
+ struct nvc0_miptree *mt = nvc0_miptree(texture);
+ boolean tex_int;
+
+ view = MALLOC_STRUCT(nv50_tic_entry);
+ if (!view)
+ return NULL;
+
+ view->pipe = *templ;
+ view->pipe.reference.count = 1;
+ view->pipe.texture = NULL;
+ view->pipe.context = pipe;
+
+ view->id = -1;
+
+ pipe_resource_reference(&view->pipe.texture, texture);
+
+ tic = &view->tic[0];
+
+ desc = util_format_description(view->pipe.format);
+
+ /* TIC[0] */
+
+ tic[0] = nvc0_format_table[view->pipe.format].tic;
+
+ tex_int = FALSE; /* XXX: integer textures */
+
+ swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int);
+ tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
+ (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
+ (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
+ (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
+ (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+
+ tic[1] = /* mt->base.bo->offset; */ 0;
+ tic[2] = /* mt->base.bo->offset >> 32 */ 0;
+
+ tic[2] |= 0x10001000 | NV50_TIC_2_NO_BORDER;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+
+ if (mt->base.base.target != PIPE_TEXTURE_RECT)
+ tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+
+ tic[2] |=
+ ((mt->base.bo->tile_mode & 0x0f0) << (22 - 4)) |
+ ((mt->base.bo->tile_mode & 0xf00) << (25 - 8));
+
+ depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
+
+ if (mt->base.base.target == PIPE_TEXTURE_1D_ARRAY ||
+ mt->base.base.target == PIPE_TEXTURE_2D_ARRAY) {
+ /* there doesn't seem to be a base layer field in TIC */
+ tic[1] = view->pipe.u.tex.first_layer * mt->layer_stride;
+ depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
+ }
+
+ switch (mt->base.base.target) {
+ case PIPE_TEXTURE_1D:
+ tic[2] |= NV50_TIC_2_TARGET_1D;
+ break;
+ case PIPE_TEXTURE_2D:
+ tic[2] |= NV50_TIC_2_TARGET_2D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ tic[2] |= NV50_TIC_2_TARGET_RECT;
+ break;
+ case PIPE_TEXTURE_3D:
+ tic[2] |= NV50_TIC_2_TARGET_3D;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ depth /= 6;
+ if (depth > 1)
+ tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ else
+ tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ break;
+ case PIPE_BUFFER:
+ tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR;
+ break;
+ default:
+ NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
+ return FALSE;
+ }
+
+ if (mt->base.base.target == PIPE_BUFFER)
+ tic[3] = mt->base.base.width0;
+ else
+ tic[3] = 0x00300000;
+
+ tic[4] = (1 << 31) | mt->base.base.width0;
+
+ tic[5] = mt->base.base.height0 & 0xffff;
+ tic[5] |= depth << 16;
+ tic[5] |= mt->base.base.last_level << 28;
+
+ tic[6] = 0x03000000;
+
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+
+ return &view->pipe;
+}
+
+static boolean
+nvc0_validate_tic(struct nvc0_context *nvc0, int s)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct nv04_resource *res;
+
+ if (!tic) {
+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+ OUT_RING (chan, (i << 1) | 0);
+ continue;
+ }
+ res = &nvc0_miptree(tic->pipe.texture)->base;
+
+ if (tic->id < 0) {
+ uint32_t offset = tic->tic[1];
+
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ MARK_RING (chan, 9 + 8, 4);
+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+ OUT_RELOCh(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+ OUT_RING (chan, 32);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_MF(EXEC), 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, RING_MF(DATA), 8);
+ OUT_RING (chan, tic->tic[0]);
+ OUT_RELOCl(chan, res->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOC (chan, res->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, tic->tic[2], tic->tic[2]);
+ OUT_RINGp (chan, &tic->tic[3], 5);
+
+ need_flush = TRUE;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
+ OUT_RING (chan, (tic->id << 4) | 1);
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+
+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+ OUT_RING (chan, (tic->id << 9) | (i << 1) | 1);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i) {
+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+ OUT_RING (chan, (i << 1) | 0);
+ }
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ return need_flush;
+}
+
+void nvc0_validate_textures(struct nvc0_context *nvc0)
+{
+ boolean need_flush;
+
+ need_flush = nvc0_validate_tic(nvc0, 0);
+ need_flush |= nvc0_validate_tic(nvc0, 4);
+
+ if (need_flush) {
+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(TIC_FLUSH), 1);
+ OUT_RING (nvc0->screen->base.channel, 0);
+ }
+}
+
+static boolean
+nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_samplers[s]; ++i) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
+
+ if (!tsc) {
+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+ OUT_RING (chan, (i << 4) | 0);
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
+
+ nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
+ 65536 + tsc->id * 32, NOUVEAU_BO_VRAM,
+ 32, tsc->tsc);
+ need_flush = TRUE;
+ }
+ nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+ OUT_RING (chan, (tsc->id << 12) | (i << 4) | 1);
+ }
+ for (; i < nvc0->state.num_samplers[s]; ++i) {
+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+ OUT_RING (chan, (i << 4) | 0);
+ }
+ nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
+
+ return need_flush;
+}
+
+void nvc0_validate_samplers(struct nvc0_context *nvc0)
+{
+ boolean need_flush;
+
+ need_flush = nvc0_validate_tsc(nvc0, 0);
+ need_flush |= nvc0_validate_tsc(nvc0, 4);
+
+ if (need_flush) {
+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(TSC_FLUSH), 1);
+ OUT_RING (nvc0->screen->base.channel, 0);
+ }
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
new file mode 100644
index 0000000000..a44d330c73
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
@@ -0,0 +1,2023 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <unistd.h>
+
+#define NOUVEAU_DEBUG 1
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_dynarray.h"
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+/* Arbitrary internal limits. */
+#define BLD_MAX_TEMPS 64
+#define BLD_MAX_ADDRS 4
+#define BLD_MAX_PREDS 4
+#define BLD_MAX_IMMDS 128
+#define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS
+
+#define BLD_MAX_COND_NESTING 8
+#define BLD_MAX_LOOP_NESTING 4
+#define BLD_MAX_CALL_NESTING 2
+
+/* This structure represents a TGSI register. */
+struct bld_register {
+ struct nv_value *current;
+ /* collect all SSA values assigned to it */
+ struct util_dynarray vals;
+ /* 1 bit per loop level, indicates if used/defd, reset when loop ends */
+ uint16_t loop_use;
+ uint16_t loop_def;
+};
+
+static INLINE struct nv_value **
+bld_register_access(struct bld_register *reg, unsigned i)
+{
+ return util_dynarray_element(&reg->vals, struct nv_value *, i);
+}
+
+static INLINE void
+bld_register_add_val(struct bld_register *reg, struct nv_value *val)
+{
+ struct nv_basic_block *bb = val->insn->bb;
+
+ if (reg->vals.size &&
+ (util_dynarray_top(&reg->vals, struct nv_value *))->insn->bb == bb)
+ *(util_dynarray_top_ptr(&reg->vals, struct nv_value *)) = val;
+ else
+ util_dynarray_append(&reg->vals, struct nv_value *, val);
+}
+
+static INLINE boolean
+bld_register_del_val(struct bld_register *reg, struct nv_value *val)
+{
+ unsigned i;
+
+ for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i)
+ if (*bld_register_access(reg, i - 1) == val)
+ break;
+ if (!i)
+ return FALSE;
+
+ if (i != reg->vals.size / sizeof(struct nv_value *))
+ *bld_register_access(reg, i - 1) = util_dynarray_pop(&reg->vals,
+ struct nv_value *);
+ else
+ reg->vals.size -= sizeof(struct nv_value *);
+
+ return TRUE;
+}
+
+struct bld_context {
+ struct nvc0_translation_info *ti;
+
+ struct nv_pc *pc;
+ struct nv_basic_block *b;
+
+ struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING];
+ int call_lvl;
+
+ struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING];
+ struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING];
+ struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
+ int cond_lvl;
+ struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
+ struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
+ int loop_lvl;
+
+ ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
+
+ struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
+ struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */
+ struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
+ struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */
+
+ uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8];
+ int hpos_index;
+
+ struct nv_value *zero;
+ struct nv_value *frag_coord[4];
+
+ /* wipe on new BB */
+ struct nv_value *saved_sysvals[4];
+ struct nv_value *saved_addr[4][2];
+ struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4];
+ struct nv_value *saved_immd[BLD_MAX_IMMDS];
+ uint num_immds;
+};
+
+static INLINE ubyte
+bld_register_file(struct bld_context *bld, struct bld_register *reg)
+{
+ if (reg >= &bld->pvs[0][0] &&
+ reg < &bld->ovs[0][0])
+ return NV_FILE_PRED;
+ return NV_FILE_GPR;
+}
+
+static INLINE struct nv_value *
+bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c)
+{
+ regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
+ return regs[i * 4 + c].current;
+}
+
+static struct nv_value *
+bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *);
+
+/* If a variable is defined in a loop without prior use, we don't need
+ * a phi in the loop header to account for backwards flow.
+ *
+ * However, if this variable is then also used outside the loop, we do
+ * need a phi after all. But we must not use this phi's def inside the
+ * loop, so we can eliminate the phi if it is unused later.
+ */
+static INLINE void
+bld_store(struct bld_context *bld,
+ struct bld_register *regs, int i, int c, struct nv_value *val)
+{
+ const uint16_t m = 1 << bld->loop_lvl;
+ struct bld_register *reg = &regs[i * 4 + c];
+
+ if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use)))
+ bld_loop_phi(bld, reg, val);
+
+ reg->current = val;
+ bld_register_add_val(reg, reg->current);
+
+ reg->loop_def |= 1 << bld->loop_lvl;
+}
+
+#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
+#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
+#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
+#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
+#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
+#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
+#define STORE_OUTP(i, c, v) \
+ do { \
+ bld_store(bld, &bld->ovs[0][0], i, c, (v)); \
+ bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
+ } while (0)
+
+static INLINE void
+bld_clear_def_use(struct bld_register *regs, int n, int lvl)
+{
+ int i;
+ const uint16_t mask = ~(1 << lvl);
+
+ for (i = 0; i < n * 4; ++i) {
+ regs[i].loop_def &= mask;
+ regs[i].loop_use &= mask;
+ }
+}
+
+static INLINE void
+bld_warn_uninitialized(struct bld_context *bld, int kind,
+ struct bld_register *reg, struct nv_basic_block *b)
+{
+#ifdef NOUVEAU_DEBUG
+ long i = (reg - &bld->tvs[0][0]) / 4;
+ long c = (reg - &bld->tvs[0][0]) & 3;
+
+ if (c == 3)
+ c = -1;
+ debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
+ i, (int)('x' + c), kind ? "may be" : "is", b->id);
+#endif
+}
+
+static INLINE struct nv_value *
+bld_def(struct nv_instruction *i, int c, struct nv_value *value)
+{
+ i->def[c] = value;
+ value->insn = i;
+ return value;
+}
+
+static INLINE struct nv_value *
+find_by_bb(struct bld_register *reg, struct nv_basic_block *b)
+{
+ int i;
+
+ if (reg->current && reg->current->insn->bb == b)
+ return reg->current;
+
+ for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i)
+ if ((*bld_register_access(reg, i))->insn->bb == b)
+ return *bld_register_access(reg, i);
+ return NULL;
+}
+
+/* Fetch value from register that was defined in the specified BB,
+ * or search for first definitions in all of its predecessors.
+ */
+static void
+fetch_by_bb(struct bld_register *reg,
+ struct nv_value **vals, int *n,
+ struct nv_basic_block *b)
+{
+ int i;
+ struct nv_value *val;
+
+ assert(*n < 16); /* MAX_COND_NESTING */
+
+ val = find_by_bb(reg, b);
+ if (val) {
+ for (i = 0; i < *n; ++i)
+ if (vals[i] == val)
+ return;
+ vals[(*n)++] = val;
+ return;
+ }
+ for (i = 0; i < b->num_in; ++i)
+ if (!IS_WALL_EDGE(b->in_kind[i]))
+ fetch_by_bb(reg, vals, n, b->in[i]);
+}
+
+static INLINE struct nv_value *
+bld_load_imm_u32(struct bld_context *bld, uint32_t u);
+
+static INLINE struct nv_value *
+bld_undef(struct bld_context *bld, ubyte file)
+{
+ struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF);
+
+ return bld_def(nvi, 0, new_value(bld->pc, file, 4));
+}
+
+static struct nv_value *
+bld_phi(struct bld_context *bld, struct nv_basic_block *b,
+ struct bld_register *reg)
+{
+ struct nv_basic_block *in;
+ struct nv_value *vals[16] = { NULL };
+ struct nv_value *val;
+ struct nv_instruction *phi;
+ int i, j, n;
+
+ do {
+ i = n = 0;
+ fetch_by_bb(reg, vals, &n, b);
+
+ if (!n) {
+ bld_warn_uninitialized(bld, 0, reg, b);
+ return NULL;
+ }
+
+ if (n == 1) {
+ if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb))
+ break;
+
+ bld_warn_uninitialized(bld, 1, reg, b);
+
+ /* back-tracking to insert missing value of other path */
+ in = b;
+ while (in->in[0]) {
+ if (in->num_in == 1) {
+ in = in->in[0];
+ } else {
+ if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b))
+ in = in->in[0];
+ else
+ if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b))
+ in = in->in[1];
+ else
+ in = in->in[0];
+ }
+ }
+ bld->pc->current_block = in;
+
+ /* should make this a no-op */
+ bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file));
+ continue;
+ }
+
+ for (i = 0; i < n; ++i) {
+ /* if value dominates b, continue to the redefinitions */
+ if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb))
+ continue;
+
+ /* if value dominates any in-block, b should be the dom frontier */
+ for (j = 0; j < b->num_in; ++j)
+ if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb))
+ break;
+ /* otherwise, find the dominance frontier and put the phi there */
+ if (j == b->num_in) {
+ in = nvc0_bblock_dom_frontier(vals[i]->insn->bb);
+ val = bld_phi(bld, in, reg);
+ bld_register_add_val(reg, val);
+ break;
+ }
+ }
+ } while(i < n);
+
+ bld->pc->current_block = b;
+
+ if (n == 1)
+ return vals[0];
+
+ phi = new_instruction(bld->pc, NV_OP_PHI);
+
+ bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size));
+ for (i = 0; i < n; ++i)
+ nv_reference(bld->pc, phi, i, vals[i]);
+
+ return phi->def[0];
+}
+
+/* Insert a phi function in the loop header.
+ * For nested loops, we need to insert phi functions in all the outer
+ * loop headers if they don't have one yet.
+ *
+ * @def: redefinition from inside loop, or NULL if to be replaced later
+ */
+static struct nv_value *
+bld_loop_phi(struct bld_context *bld, struct bld_register *reg,
+ struct nv_value *def)
+{
+ struct nv_instruction *phi;
+ struct nv_basic_block *bb = bld->pc->current_block;
+ struct nv_value *val = NULL;
+
+ if (bld->ti->require_stores) /* XXX: actually only for INDEXABLE_TEMP */
+ return NULL;
+
+ if (bld->loop_lvl > 1) {
+ --bld->loop_lvl;
+ if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl)))
+ val = bld_loop_phi(bld, reg, NULL);
+ ++bld->loop_lvl;
+ }
+
+ if (!val)
+ val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */
+ if (!val) {
+ bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0];
+ val = bld_undef(bld, bld_register_file(bld, reg));
+ }
+
+ bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1];
+
+ phi = new_instruction(bld->pc, NV_OP_PHI);
+
+ bld_def(phi, 0, new_value_like(bld->pc, val));
+ if (!def)
+ def = phi->def[0];
+
+ bld_register_add_val(reg, phi->def[0]);
+
+ phi->target = (struct nv_basic_block *)reg; /* cheat */
+
+ nv_reference(bld->pc, phi, 0, val);
+ nv_reference(bld->pc, phi, 1, def);
+
+ bld->pc->current_block = bb;
+
+ return phi->def[0];
+}
+
+static INLINE struct nv_value *
+bld_fetch_global(struct bld_context *bld, struct bld_register *reg)
+{
+ const uint16_t m = 1 << bld->loop_lvl;
+ const uint16_t use = reg->loop_use;
+
+ reg->loop_use |= m;
+
+ /* If neither used nor def'd inside the loop, build a phi in foresight,
+ * so we don't have to replace stuff later on, which requires tracking.
+ */
+ if (bld->loop_lvl && !((use | reg->loop_def) & m))
+ return bld_loop_phi(bld, reg, NULL);
+
+ return bld_phi(bld, bld->pc->current_block, reg);
+}
+
+static INLINE struct nv_value *
+bld_imm_u32(struct bld_context *bld, uint32_t u)
+{
+ int i;
+ unsigned n = bld->num_immds;
+
+ for (i = 0; i < n; ++i)
+ if (bld->saved_immd[i]->reg.imm.u32 == u)
+ return bld->saved_immd[i];
+
+ assert(n < BLD_MAX_IMMDS);
+ bld->num_immds++;
+
+ bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4);
+ bld->saved_immd[n]->reg.imm.u32 = u;
+ return bld->saved_immd[n];
+}
+
+static void
+bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
+ struct nv_value *);
+
+/* Replace the source of the phi in the loop header by the last assignment,
+ * or eliminate the phi function if there is no assignment inside the loop.
+ *
+ * Redundancy situation 1 - (used) but (not redefined) value:
+ * %3 = phi %0, %3 = %3 is used
+ * %3 = phi %0, %4 = is new definition
+ *
+ * Redundancy situation 2 - (not used) but (redefined) value:
+ * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
+ */
+static void
+bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
+{
+ struct nv_basic_block *save = bld->pc->current_block;
+ struct nv_instruction *phi, *next;
+ struct nv_value *val;
+ struct bld_register *reg;
+ int i, s, n;
+
+ for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {
+ next = phi->next;
+
+ reg = (struct bld_register *)phi->target;
+ phi->target = NULL;
+
+ /* start with s == 1, src[0] is from outside the loop */
+ for (s = 1, n = 0; n < bb->num_in; ++n) {
+ if (bb->in_kind[n] != CFG_EDGE_BACK)
+ continue;
+
+ assert(s < 4);
+ bld->pc->current_block = bb->in[n];
+ val = bld_fetch_global(bld, reg);
+
+ for (i = 0; i < 4; ++i)
+ if (phi->src[i] && phi->src[i]->value == val)
+ break;
+ if (i == 4) {
+ /* skip values we do not want to replace */
+ for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s);
+ nv_reference(bld->pc, phi, s++, val);
+ }
+ }
+ bld->pc->current_block = save;
+
+ if (phi->src[0]->value == phi->def[0] ||
+ phi->src[0]->value == phi->src[1]->value)
+ s = 1;
+ else
+ if (phi->src[1]->value == phi->def[0])
+ s = 0;
+ else
+ continue;
+
+ if (s >= 0) {
+ /* eliminate the phi */
+ bld_register_del_val(reg, phi->def[0]);
+
+ ++bld->pc->pass_seq;
+ bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value);
+
+ nvc0_insn_delete(phi);
+ }
+ }
+}
+
+static INLINE struct nv_value *
+bld_imm_f32(struct bld_context *bld, float f)
+{
+ return bld_imm_u32(bld, fui(f));
+}
+
+static struct nv_value *
+bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+ nv_reference(bld->pc, insn, 0, src0);
+
+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
+}
+
+static struct nv_value *
+bld_insn_2(struct bld_context *bld, uint opcode,
+ struct nv_value *src0, struct nv_value *src1)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+ nv_reference(bld->pc, insn, 0, src0);
+ nv_reference(bld->pc, insn, 1, src1);
+
+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
+}
+
+static struct nv_value *
+bld_insn_3(struct bld_context *bld, uint opcode,
+ struct nv_value *src0, struct nv_value *src1,
+ struct nv_value *src2)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+ nv_reference(bld->pc, insn, 0, src0);
+ nv_reference(bld->pc, insn, 1, src1);
+ nv_reference(bld->pc, insn, 2, src2);
+
+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
+}
+
+static INLINE void
+bld_src_predicate(struct bld_context *bld,
+ struct nv_instruction *nvi, int s, struct nv_value *val)
+{
+ nvi->predicate = s;
+ nv_reference(bld->pc, nvi, s, val);
+}
+
+static INLINE void
+bld_src_pointer(struct bld_context *bld,
+ struct nv_instruction *nvi, int s, struct nv_value *val)
+{
+ nvi->indirect = s;
+ nv_reference(bld->pc, nvi, s, val);
+}
+
+static void
+bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst,
+ struct nv_value *val)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST);
+ struct nv_value *loc;
+
+ loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
+
+ loc->reg.address = ofst * 4;
+
+ nv_reference(bld->pc, insn, 0, loc);
+ nv_reference(bld->pc, insn, 1, val);
+ if (ptr)
+ bld_src_pointer(bld, insn, 2, ptr);
+}
+
+static struct nv_value *
+bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst)
+{
+ struct nv_value *loc, *val;
+
+ loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
+
+ loc->reg.address = ofst * 4;
+
+ val = bld_insn_1(bld, NV_OP_LD, loc);
+ if (ptr)
+ bld_src_pointer(bld, val->insn, 1, ptr);
+
+ return val;
+}
+
+static struct nv_value *
+bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e)
+{
+ struct nv_value *val;
+
+ val = bld_insn_1(bld, NV_OP_LG2, x);
+ val = bld_insn_2(bld, NV_OP_MUL_F32, e, val);
+
+ val = bld_insn_1(bld, NV_OP_PREEX2, val);
+ val = bld_insn_1(bld, NV_OP_EX2, val);
+
+ return val;
+}
+
+static INLINE struct nv_value *
+bld_load_imm_f32(struct bld_context *bld, float f)
+{
+ if (f == 0.0f)
+ return bld->zero;
+ return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f));
+}
+
+static INLINE struct nv_value *
+bld_load_imm_u32(struct bld_context *bld, uint32_t u)
+{
+ if (u == 0)
+ return bld->zero;
+ return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u));
+}
+
+static INLINE struct nv_value *
+bld_setp(struct bld_context *bld, uint op, uint8_t cc,
+ struct nv_value *src0, struct nv_value *src1)
+{
+ struct nv_value *val = bld_insn_2(bld, op, src0, src1);
+
+ val->reg.file = NV_FILE_PRED;
+ val->reg.size = 1;
+ val->insn->set_cond = cc & 0xf;
+ return val;
+}
+
+static INLINE struct nv_value *
+bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src)
+{
+ struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src);
+ val->insn->ext.cvt.d = dt;
+ val->insn->ext.cvt.s = st;
+ return val;
+}
+
+static void
+bld_kil(struct bld_context *bld, struct nv_value *src)
+{
+ struct nv_instruction *nvi;
+
+ src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero);
+
+ nvi = new_instruction(bld->pc, NV_OP_KIL);
+ nvi->fixed = 1;
+
+ bld_src_predicate(bld, nvi, 0, src);
+}
+
+static void
+bld_flow(struct bld_context *bld, uint opcode,
+ struct nv_value *pred, uint8_t cc, struct nv_basic_block *target,
+ boolean reconverge)
+{
+ struct nv_instruction *nvi;
+
+ if (reconverge)
+ new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1;
+
+ nvi = new_instruction(bld->pc, opcode);
+ nvi->target = target;
+ nvi->terminator = 1;
+ if (pred) {
+ nvi->cc = cc;
+ bld_src_predicate(bld, nvi, 0, pred);
+ }
+}
+
+static ubyte
+translate_setcc(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_SLT: return NV_CC_LT;
+ case TGSI_OPCODE_SGE: return NV_CC_GE;
+ case TGSI_OPCODE_SEQ: return NV_CC_EQ;
+ case TGSI_OPCODE_SGT: return NV_CC_GT;
+ case TGSI_OPCODE_SLE: return NV_CC_LE;
+ case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U;
+ case TGSI_OPCODE_STR: return NV_CC_TR;
+ case TGSI_OPCODE_SFL: return NV_CC_FL;
+
+ case TGSI_OPCODE_ISLT: return NV_CC_LT;
+ case TGSI_OPCODE_ISGE: return NV_CC_GE;
+ case TGSI_OPCODE_USEQ: return NV_CC_EQ;
+ case TGSI_OPCODE_USGE: return NV_CC_GE;
+ case TGSI_OPCODE_USLT: return NV_CC_LT;
+ case TGSI_OPCODE_USNE: return NV_CC_NE;
+ default:
+ assert(0);
+ return NV_CC_FL;
+ }
+}
+
+static uint
+translate_opcode(uint opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_ABS: return NV_OP_ABS_F32;
+ case TGSI_OPCODE_ADD: return NV_OP_ADD_F32;
+ case TGSI_OPCODE_SUB: return NV_OP_SUB_F32;
+ case TGSI_OPCODE_UADD: return NV_OP_ADD_B32;
+ case TGSI_OPCODE_AND: return NV_OP_AND;
+ case TGSI_OPCODE_EX2: return NV_OP_EX2;
+ case TGSI_OPCODE_CEIL: return NV_OP_CEIL;
+ case TGSI_OPCODE_FLR: return NV_OP_FLOOR;
+ case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC;
+ case TGSI_OPCODE_COS: return NV_OP_COS;
+ case TGSI_OPCODE_SIN: return NV_OP_SIN;
+ case TGSI_OPCODE_DDX: return NV_OP_DFDX;
+ case TGSI_OPCODE_DDY: return NV_OP_DFDY;
+ case TGSI_OPCODE_F2I:
+ case TGSI_OPCODE_F2U:
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_U2F: return NV_OP_CVT;
+ case TGSI_OPCODE_INEG: return NV_OP_NEG_S32;
+ case TGSI_OPCODE_LG2: return NV_OP_LG2;
+ case TGSI_OPCODE_ISHR: return NV_OP_SAR;
+ case TGSI_OPCODE_USHR: return NV_OP_SHR;
+ case TGSI_OPCODE_MAD: return NV_OP_MAD_F32;
+ case TGSI_OPCODE_MAX: return NV_OP_MAX_F32;
+ case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32;
+ case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32;
+ case TGSI_OPCODE_MIN: return NV_OP_MIN_F32;
+ case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32;
+ case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32;
+ case TGSI_OPCODE_MUL: return NV_OP_MUL_F32;
+ case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32;
+ case TGSI_OPCODE_OR: return NV_OP_OR;
+ case TGSI_OPCODE_RCP: return NV_OP_RCP;
+ case TGSI_OPCODE_RSQ: return NV_OP_RSQ;
+ case TGSI_OPCODE_SAD: return NV_OP_SAD;
+ case TGSI_OPCODE_SHL: return NV_OP_SHL;
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE: return NV_OP_FSET_F32;
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_ISGE: return NV_OP_SET_S32;
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE: return NV_OP_SET_U32;
+ case TGSI_OPCODE_TEX: return NV_OP_TEX;
+ case TGSI_OPCODE_TXP: return NV_OP_TEX;
+ case TGSI_OPCODE_TXB: return NV_OP_TXB;
+ case TGSI_OPCODE_TXL: return NV_OP_TXL;
+ case TGSI_OPCODE_XOR: return NV_OP_XOR;
+ default:
+ return NV_OP_NOP;
+ }
+}
+
+#if 0
+static ubyte
+infer_src_type(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_SAD:
+ case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_UADD:
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ case TGSI_OPCODE_USHR:
+ return NV_TYPE_U32;
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_IDIV:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_INEG:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_ISLT:
+ return NV_TYPE_S32;
+ default:
+ return NV_TYPE_F32;
+ }
+}
+
+static ubyte
+infer_dst_type(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_F2U:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_SAD:
+ case TGSI_OPCODE_UADD:
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ case TGSI_OPCODE_USHR:
+ return NV_TYPE_U32;
+ case TGSI_OPCODE_F2I:
+ case TGSI_OPCODE_IDIV:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_INEG:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_ISLT:
+ return NV_TYPE_S32;
+ default:
+ return NV_TYPE_F32;
+ }
+}
+#endif
+
+static void
+emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
+ unsigned chan, struct nv_value *res)
+{
+ const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+ struct nv_instruction *nvi;
+ struct nv_value *mem;
+ struct nv_value *ptr = NULL;
+ int idx;
+
+ idx = reg->Register.Index;
+ assert(chan < 4);
+
+ if (reg->Register.Indirect)
+ ptr = FETCH_ADDR(reg->Indirect.Index,
+ tgsi_util_get_src_register_swizzle(&reg->Indirect, 0));
+
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ res = bld_insn_1(bld, NV_OP_SAT, res);
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f));
+ res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f));
+ break;
+ }
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_OUTPUT:
+ if (!res->insn)
+ res = bld_insn_1(bld, NV_OP_MOV, res);
+
+ if (bld->pc->is_fragprog) {
+ assert(!ptr);
+ STORE_OUTP(idx, chan, res);
+ } else {
+ nvi = new_instruction(bld->pc, NV_OP_EXPORT);
+ mem = new_value(bld->pc, bld->ti->output_file, res->reg.size);
+ nv_reference(bld->pc, nvi, 0, mem);
+ nv_reference(bld->pc, nvi, 1, res);
+ if (!ptr)
+ mem->reg.address = bld->ti->output_loc[idx][chan];
+ else
+ mem->reg.address = 0x80 + idx * 16 + chan * 4;
+ nvi->fixed = 1;
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ assert(idx < BLD_MAX_TEMPS);
+ if (!res->insn || res->insn->bb != bld->pc->current_block)
+ res = bld_insn_1(bld, NV_OP_MOV, res);
+
+ assert(res->reg.file == NV_FILE_GPR);
+
+ if (bld->ti->require_stores)
+ bld_lmem_store(bld, ptr, idx * 4 + chan, res);
+ else
+ STORE_TEMP(idx, chan, res);
+ break;
+ case TGSI_FILE_ADDRESS:
+ assert(idx < BLD_MAX_ADDRS);
+ STORE_ADDR(idx, chan, res);
+ break;
+ }
+}
+
+static INLINE uint32_t
+bld_is_output_written(struct bld_context *bld, int i, int c)
+{
+ if (c < 0)
+ return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32));
+ return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32));
+}
+
+static void
+bld_append_vp_ucp(struct bld_context *bld)
+{
+ struct nv_value *res[6];
+ struct nv_value *ucp, *vtx, *out;
+ struct nv_instruction *insn;
+ int i, c;
+
+ assert(bld->ti->prog->vp.num_ucps <= 6);
+
+ for (c = 0; c < 4; ++c) {
+ vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]);
+
+ for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) {
+ ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4);
+ ucp->reg.address = i * 16 + c * 4;
+
+ if (c == 0)
+ res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp);
+ else
+ res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]);
+ }
+ }
+
+ for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) {
+ (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4;
+ (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
+ nv_reference(bld->pc, insn, 0, out);
+ nv_reference(bld->pc, insn, 1, res[i]);
+ }
+}
+
+static void
+bld_export_fp_outputs(struct bld_context *bld)
+{
+ struct nv_value *vals[4];
+ struct nv_instruction *nvi;
+ int i, c, n;
+
+ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
+ if (!bld_is_output_written(bld, i, -1))
+ continue;
+ for (n = 0, c = 0; c < 4; ++c) {
+ if (!bld_is_output_written(bld, i, c))
+ continue;
+ vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]);
+ assert(vals[n]);
+ vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]);
+ vals[n++]->reg.id = bld->ti->output_loc[i][c];
+ }
+ assert(n);
+
+ (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
+ for (c = 0; c < n; ++c)
+ nv_reference(bld->pc, nvi, c, vals[c]);
+ }
+}
+
+static void
+bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
+{
+ int i, c;
+
+ bld->pc->current_block = b;
+
+ for (i = 0; i < 4; ++i)
+ bld->saved_addr[i][0] = NULL;
+ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
+ for (c = 0; c < 4; ++c)
+ bld->saved_inputs[i][c] = NULL;
+
+ bld->out_kind = CFG_EDGE_FORWARD;
+}
+
+static struct nv_value *
+bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val)
+{
+ unsigned cent = mode & NVC0_INTERP_CENTROID;
+
+ mode &= ~NVC0_INTERP_CENTROID;
+
+ if (val->reg.address == 0x3fc) {
+ /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
+ val = bld_insn_1(bld, NV_OP_LINTERP, val);
+ val->insn->flat = 1;
+ val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31));
+ val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f));
+ return val;
+ } else
+ if (mode == NVC0_INTERP_PERSPECTIVE) {
+ val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]);
+ } else {
+ val = bld_insn_1(bld, NV_OP_LINTERP, val);
+ }
+
+ val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0;
+ val->insn->centroid = cent ? 1 : 0;
+ return val;
+}
+
+static struct nv_value *
+emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
+ const unsigned s, const unsigned chan)
+{
+ const struct tgsi_full_src_register *src = &insn->Src[s];
+ struct nv_value *res = NULL;
+ struct nv_value *ptr = NULL;
+ int idx, ind_idx, dim_idx;
+ unsigned swz, ind_swz, sgn;
+
+ idx = src->Register.Index;
+ swz = tgsi_util_get_full_src_register_swizzle(src, chan);
+
+ if (src->Register.Indirect) {
+ ind_idx = src->Indirect.Index;
+ ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0);
+
+ ptr = FETCH_ADDR(ind_idx, ind_swz);
+ }
+
+ if (src->Register.Dimension)
+ dim_idx = src->Dimension.Index;
+ else
+ dim_idx = 0;
+
+ switch (src->Register.File) {
+ case TGSI_FILE_CONSTANT:
+ assert(dim_idx < 14);
+ res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4);
+ res->reg.address = idx * 16 + swz * 4;
+ res = bld_insn_1(bld, NV_OP_LD, res);
+ if (ptr)
+ bld_src_pointer(bld, res->insn, 1, ptr);
+ break;
+ case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */
+ assert(idx < bld->ti->immd32_nr);
+ res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]);
+ break;
+ case TGSI_FILE_INPUT:
+ assert(!src->Register.Dimension);
+ if (!ptr) {
+ res = bld->saved_inputs[idx][swz];
+ if (res)
+ break;
+ }
+ res = new_value(bld->pc, bld->ti->input_file, 4);
+ if (ptr)
+ res->reg.address = 0x80 + idx * 16 + swz * 4;
+ else
+ res->reg.address = bld->ti->input_loc[idx][swz];
+
+ if (bld->pc->is_fragprog)
+ res = bld_interp(bld, bld->ti->interp_mode[idx], res);
+ else
+ res = bld_insn_1(bld, NV_OP_VFETCH, res);
+
+ if (ptr)
+ bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr);
+ else
+ bld->saved_inputs[idx][swz] = res;
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (bld->ti->require_stores)
+ res = bld_lmem_load(bld, ptr, idx * 4 + swz);
+ else
+ res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
+ break;
+ case TGSI_FILE_ADDRESS:
+ res = bld_fetch_global(bld, &bld->avs[idx][swz]);
+ break;
+ case TGSI_FILE_PREDICATE:
+ res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
+ break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(bld->ti->sysval_loc[idx] < 0xf00); /* >= would mean special reg */
+ res = new_value(bld->pc,
+ bld->pc->is_fragprog ? NV_FILE_MEM_V : NV_FILE_MEM_A, 4);
+ res->reg.address = bld->ti->sysval_loc[idx];
+
+ if (res->reg.file == NV_FILE_MEM_A)
+ res = bld_insn_1(bld, NV_OP_VFETCH, res);
+ else
+ res = bld_interp(bld, NVC0_INTERP_FLAT, res);
+
+ /* mesa doesn't do real integers yet :-(and in GL this should be S32) */
+ res = bld_cvt(bld, NV_TYPE_F32, NV_TYPE_U32, res);
+ break;
+ default:
+ NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
+ abort();
+ break;
+ }
+ if (!res)
+ return bld_undef(bld, NV_FILE_GPR);
+
+ sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
+
+ switch (sgn) {
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ case TGSI_UTIL_SIGN_CLEAR:
+ res = bld_insn_1(bld, NV_OP_ABS_F32, res);
+ break;
+ case TGSI_UTIL_SIGN_TOGGLE:
+ res = bld_insn_1(bld, NV_OP_NEG_F32, res);
+ break;
+ case TGSI_UTIL_SIGN_SET:
+ res = bld_insn_1(bld, NV_OP_ABS_F32, res);
+ res = bld_insn_1(bld, NV_OP_NEG_F32, res);
+ break;
+ default:
+ NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
+ abort();
+ break;
+ }
+
+ return res;
+}
+
+static void
+bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
+ const struct tgsi_full_instruction *insn)
+{
+ struct nv_value *val0 = NULL;
+ unsigned mask = insn->Dst[0].Register.WriteMask;
+
+ if (mask & ((1 << 0) | (1 << 3)))
+ dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f);
+
+ if (mask & (3 << 1)) {
+ val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero);
+ if (mask & (1 << 1))
+ dst0[1] = val0;
+ }
+
+ if (mask & (1 << 2)) {
+ struct nv_value *val1, *val3, *src1, *src3, *pred;
+ struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f);
+ struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f);
+
+ src1 = emit_fetch(bld, insn, 0, 1);
+ src3 = emit_fetch(bld, insn, 0, 3);
+
+ pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero);
+
+ val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero);
+ val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128);
+ val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128);
+ val3 = bld_pow(bld, val1, val3);
+
+ dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero);
+ bld_src_predicate(bld, dst0[2]->insn, 1, pred);
+
+ dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]);
+ }
+}
+
+static INLINE void
+describe_texture_target(unsigned target, int *dim,
+ int *array, int *cube, int *shadow)
+{
+ *dim = *array = *cube = *shadow = 0;
+
+ switch (target) {
+ case TGSI_TEXTURE_1D:
+ *dim = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ *dim = *shadow = 1;
+ break;
+ case TGSI_TEXTURE_UNKNOWN:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ *dim = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ *dim = 2;
+ *shadow = 1;
+ break;
+ case TGSI_TEXTURE_3D:
+ *dim = 3;
+ break;
+ case TGSI_TEXTURE_CUBE:
+ *dim = 2;
+ *cube = 1;
+ break;
+ case TGSI_TEXTURE_1D_ARRAY:
+ *dim = *array = 1;
+ break;
+ case TGSI_TEXTURE_2D_ARRAY:
+ *dim = 2;
+ *array = 1;
+ break;
+ /*
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ *dim = *array = *shadow = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ *dim = 2;
+ *array = *shadow = 1;
+ break;
+ case TGSI_TEXTURE_CUBE_ARRAY:
+ *dim = 2;
+ *cube = *array = 1;
+ break;
+ */
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static struct nv_value *
+bld_clone(struct bld_context *bld, struct nv_instruction *nvi)
+{
+ struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode);
+ struct nv_instruction *next, *prev;
+ int c;
+
+ next = dupi->next;
+ prev = dupi->prev;
+
+ *dupi = *nvi;
+
+ dupi->next = next;
+ dupi->prev = prev;
+
+ for (c = 0; c < 5 && nvi->def[c]; ++c)
+ bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c]));
+
+ for (c = 0; c < 6 && nvi->src[c]; ++c) {
+ dupi->src[c] = NULL;
+ nv_reference(bld->pc, dupi, c, nvi->src[c]->value);
+ }
+
+ return dupi->def[0];
+}
+
+/* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */
+static void
+load_proj_tex_coords(struct bld_context *bld,
+ struct nv_value *t[4], int dim, int shadow,
+ const struct tgsi_full_instruction *insn)
+{
+ int c;
+ unsigned mask = (1 << dim) - 1;
+
+ if (shadow)
+ mask |= 4; /* depth comparison value */
+
+ t[3] = emit_fetch(bld, insn, 0, 3);
+ if (t[3]->insn->opcode == NV_OP_PINTERP) {
+ t[3] = bld_clone(bld, t[3]->insn);
+ t[3]->insn->opcode = NV_OP_LINTERP;
+ nv_reference(bld->pc, t[3]->insn, 1, NULL);
+ }
+ t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
+
+ for (c = 0; c < 4; ++c) {
+ if (!(mask & (1 << c)))
+ continue;
+ t[c] = emit_fetch(bld, insn, 0, c);
+
+ if (t[c]->insn->opcode != NV_OP_PINTERP)
+ continue;
+ mask &= ~(1 << c);
+
+ t[c] = bld_clone(bld, t[c]->insn);
+ nv_reference(bld->pc, t[c]->insn, 1, t[3]);
+ }
+ if (mask == 0)
+ return;
+
+ t[3] = emit_fetch(bld, insn, 0, 3);
+ t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
+
+ for (c = 0; c < 4; ++c)
+ if (mask & (1 << c))
+ t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]);
+}
+
+/* For a quad of threads / top left, top right, bottom left, bottom right
+ * pixels, do a different operation, and take src0 from a specific thread.
+ */
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV1 3
+
+#define QOP(a, b, c, d) \
+ ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
+
+static INLINE struct nv_value *
+bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane,
+ struct nv_value *src1, boolean wp)
+{
+ struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1);
+ val->insn->lanes = lane;
+ val->insn->quadop = qop;
+ if (wp) {
+ assert(!"quadop predicate write");
+ }
+ return val;
+}
+
+/* order of TGSI operands: x y z layer shadow lod/bias */
+/* order of native operands: layer x y z | lod/bias shadow */
+static struct nv_instruction *
+emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
+ struct nv_value *dst[4], struct nv_value *arg[4],
+ int dim, int array, int cube, int shadow)
+{
+ struct nv_value *src[4];
+ struct nv_instruction *nvi, *bnd;
+ int c;
+ int s = 0;
+ boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
+
+ if (array)
+ arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]);
+
+ /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */
+
+ bnd = new_instruction(bld->pc, NV_OP_BIND);
+ if (array) {
+ src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
+ bld_def(bnd, s, src[s]);
+ nv_reference(bld->pc, bnd, s++, arg[dim + cube]);
+ }
+ for (c = 0; c < dim + cube; ++c, ++s) {
+ src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4));
+ nv_reference(bld->pc, bnd, s, arg[c]);
+ }
+
+ if (shadow || lodbias) {
+ bnd = new_instruction(bld->pc, NV_OP_BIND);
+
+ if (lodbias) {
+ src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
+ bld_def(bnd, 0, src[s++]);
+ nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]);
+ }
+ if (shadow) {
+ src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
+ bld_def(bnd, lodbias, src[s++]);
+ nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]);
+ }
+ }
+
+ nvi = new_instruction(bld->pc, opcode);
+ for (c = 0; c < 4; ++c)
+ dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4));
+ for (c = 0; c < s; ++c)
+ nv_reference(bld->pc, nvi, c, src[c]);
+
+ nvi->ext.tex.t = tic;
+ nvi->ext.tex.s = tsc;
+ nvi->tex_mask = 0xf;
+ nvi->tex_cube = cube;
+ nvi->tex_dim = dim;
+ nvi->tex_cube = cube;
+ nvi->tex_shadow = shadow;
+ nvi->tex_array = array;
+ nvi->tex_live = 0;
+
+ return nvi;
+}
+
+static void
+bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
+ const struct tgsi_full_instruction *insn)
+{
+ struct nv_value *t[4], *s[3];
+ uint opcode = translate_opcode(insn->Instruction.Opcode);
+ int c, dim, array, cube, shadow;
+ const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
+ const int tic = insn->Src[1].Register.Index;
+ const int tsc = tic;
+
+ describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow);
+
+ assert(dim + array + shadow + lodbias <= 5);
+
+ if (!cube && !array && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
+ load_proj_tex_coords(bld, t, dim, shadow, insn);
+ else {
+ for (c = 0; c < dim + cube + array; ++c)
+ t[c] = emit_fetch(bld, insn, 0, c);
+ if (shadow)
+ t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2));
+ }
+
+ if (cube) {
+ for (c = 0; c < 3; ++c)
+ s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]);
+
+ s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]);
+ s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]);
+ s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]);
+
+ for (c = 0; c < 3; ++c)
+ t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]);
+ }
+
+ if (lodbias)
+ t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3);
+
+ emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow);
+}
+
+static INLINE struct nv_value *
+bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn,
+ int n)
+{
+ struct nv_value *dotp, *src0, *src1;
+ int c;
+
+ src0 = emit_fetch(bld, insn, 0, 0);
+ src1 = emit_fetch(bld, insn, 1, 0);
+ dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
+
+ for (c = 1; c < n; ++c) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp);
+ }
+ return dotp;
+}
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
+ for (chan = 0; chan < 4; ++chan) \
+ if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
+
+static void
+bld_instruction(struct bld_context *bld,
+ const struct tgsi_full_instruction *insn)
+{
+ struct nv_value *src0;
+ struct nv_value *src1;
+ struct nv_value *src2;
+ struct nv_value *dst0[4] = { NULL };
+ struct nv_value *temp;
+ int c;
+ uint opcode = translate_opcode(insn->Instruction.Opcode);
+ uint8_t mask = insn->Dst[0].Register.WriteMask;
+
+#ifdef NOUVEAU_DEBUG
+ debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1);
+#endif
+
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MUL:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dst0[c] = bld_insn_2(bld, opcode, src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_ARL:
+ src1 = bld_imm_u32(bld, 4);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src0 = bld_insn_1(bld, NV_OP_FLOOR, src0);
+ src0->insn->ext.cvt.d = NV_TYPE_S32;
+ src0->insn->ext.cvt.s = NV_TYPE_F32;
+ dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_CMP:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ src2 = emit_fetch(bld, insn, 2, c);
+ dst0[c] = bld_insn_3(bld, NV_OP_SLCT_F32, src1, src2, src0);
+ dst0[c]->insn->set_cond = NV_CC_LT;
+ }
+ break;
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+ if (insn->Dst[0].Register.WriteMask & 7)
+ temp = bld_insn_1(bld, opcode, temp);
+ for (c = 0; c < 3; ++c)
+ if (insn->Dst[0].Register.WriteMask & (1 << c))
+ dst0[c] = temp;
+ if (!(insn->Dst[0].Register.WriteMask & (1 << 3)))
+ break;
+ src0 = emit_fetch(bld, insn, 0, 3);
+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+ dst0[3] = bld_insn_1(bld, opcode, temp);
+ break;
+ case TGSI_OPCODE_DP2:
+ temp = bld_dot(bld, insn, 2);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_DP3:
+ temp = bld_dot(bld, insn, 3);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_DP4:
+ temp = bld_dot(bld, insn, 4);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_DPH:
+ src0 = bld_dot(bld, insn, 3);
+ src1 = emit_fetch(bld, insn, 1, 3);
+ temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_DST:
+ if (insn->Dst[0].Register.WriteMask & 1)
+ dst0[0] = bld_imm_f32(bld, 1.0f);
+ if (insn->Dst[0].Register.WriteMask & 2) {
+ src0 = emit_fetch(bld, insn, 0, 1);
+ src1 = emit_fetch(bld, insn, 1, 1);
+ dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
+ }
+ if (insn->Dst[0].Register.WriteMask & 4)
+ dst0[2] = emit_fetch(bld, insn, 0, 2);
+ if (insn->Dst[0].Register.WriteMask & 8)
+ dst0[3] = emit_fetch(bld, insn, 1, 3);
+ break;
+ case TGSI_OPCODE_EXP:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_FLOOR, src0);
+
+ if (insn->Dst[0].Register.WriteMask & 2)
+ dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp);
+ if (insn->Dst[0].Register.WriteMask & 1) {
+ temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
+ dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp);
+ }
+ if (insn->Dst[0].Register.WriteMask & 4) {
+ temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
+ dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp);
+ }
+ if (insn->Dst[0].Register.WriteMask & 8)
+ dst0[3] = bld_imm_f32(bld, 1.0f);
+ break;
+ case TGSI_OPCODE_EX2:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
+ temp = bld_insn_1(bld, NV_OP_EX2, temp);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_FRC:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0);
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]);
+ }
+ break;
+ case TGSI_OPCODE_KIL:
+ for (c = 0; c < 4; ++c)
+ bld_kil(bld, emit_fetch(bld, insn, 0, c));
+ break;
+ case TGSI_OPCODE_KILP:
+ (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1;
+ break;
+ case TGSI_OPCODE_IF:
+ {
+ struct nv_basic_block *b = new_basic_block(bld->pc);
+ struct nv_value *pred = emit_fetch(bld, insn, 0, 0);
+
+ assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
+
+ nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD);
+
+ bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
+ bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
+
+ if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) {
+ pred = bld_clone(bld, pred->insn);
+ pred->reg.size = 1;
+ pred->reg.file = NV_FILE_PRED;
+ if (pred->insn->opcode == NV_OP_FSET_F32)
+ pred->insn->opcode = NV_OP_SET_F32;
+ } else {
+ pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U,
+ pred, bld->zero);
+ }
+ assert(!mask);
+
+ bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0));
+
+ ++bld->cond_lvl;
+ bld_new_block(bld, b);
+ }
+ break;
+ case TGSI_OPCODE_ELSE:
+ {
+ struct nv_basic_block *b = new_basic_block(bld->pc);
+
+ --bld->cond_lvl;
+ nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
+
+ bld->cond_bb[bld->cond_lvl]->exit->target = b;
+ bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
+
+ new_instruction(bld->pc, NV_OP_BRA)->terminator = 1;
+
+ ++bld->cond_lvl;
+ bld_new_block(bld, b);
+ }
+ break;
+ case TGSI_OPCODE_ENDIF:
+ {
+ struct nv_basic_block *b = new_basic_block(bld->pc);
+
+ if (bld->pc->current_block->exit &&
+ !bld->pc->current_block->exit->terminator)
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, b, FALSE);
+
+ --bld->cond_lvl;
+ nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind);
+ nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
+
+ bld->cond_bb[bld->cond_lvl]->exit->target = b;
+
+ bld_new_block(bld, b);
+
+ if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) {
+ bld->join_bb[bld->cond_lvl]->exit->prev->target = b;
+ new_instruction(bld->pc, NV_OP_JOIN)->join = 1;
+ }
+ }
+ break;
+ case TGSI_OPCODE_BGNLOOP:
+ {
+ struct nv_basic_block *bl = new_basic_block(bld->pc);
+ struct nv_basic_block *bb = new_basic_block(bld->pc);
+
+ assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING);
+
+ bld->loop_bb[bld->loop_lvl] = bl;
+ bld->brkt_bb[bld->loop_lvl] = bb;
+
+ nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
+
+ bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
+
+ if (bld->loop_lvl == bld->pc->loop_nesting_bound)
+ bld->pc->loop_nesting_bound++;
+
+ bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
+ bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
+ bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
+ }
+ break;
+ case TGSI_OPCODE_BRK:
+ {
+ struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
+
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
+
+ if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
+
+ bld->out_kind = CFG_EDGE_FAKE;
+ }
+ break;
+ case TGSI_OPCODE_CONT:
+ {
+ struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
+
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
+
+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
+
+ if ((bb = bld->join_bb[bld->cond_lvl - 1])) {
+ bld->join_bb[bld->cond_lvl - 1] = NULL;
+ nvc0_insn_delete(bb->exit->prev);
+ }
+ bld->out_kind = CFG_EDGE_FAKE;
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ {
+ struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
+
+ if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
+
+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
+ }
+
+ bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
+
+ bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]);
+ }
+ break;
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_CEIL:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_TRUNC:
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ dst0[c] = bld_insn_1(bld, opcode, src0);
+ }
+ break;
+ case TGSI_OPCODE_LIT:
+ bld_lit(bld, dst0, insn);
+ break;
+ case TGSI_OPCODE_LRP:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ src2 = emit_fetch(bld, insn, 2, c);
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2);
+ dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2);
+ }
+ break;
+ case TGSI_OPCODE_MOV:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = emit_fetch(bld, insn, 0, c);
+ break;
+ case TGSI_OPCODE_MAD:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ src2 = emit_fetch(bld, insn, 2, c);
+ dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2);
+ }
+ break;
+ case TGSI_OPCODE_POW:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ src1 = emit_fetch(bld, insn, 1, 0);
+ temp = bld_pow(bld, src0, src1);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_LOG:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0);
+ temp = bld_insn_1(bld, NV_OP_LG2, src0);
+ dst0[2] = temp;
+ if (insn->Dst[0].Register.WriteMask & 3) {
+ temp = bld_insn_1(bld, NV_OP_FLOOR, temp);
+ dst0[0] = temp;
+ }
+ if (insn->Dst[0].Register.WriteMask & 2) {
+ temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
+ temp = bld_insn_1(bld, NV_OP_EX2, temp);
+ temp = bld_insn_1(bld, NV_OP_RCP, temp);
+ dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp);
+ }
+ if (insn->Dst[0].Register.WriteMask & 8)
+ dst0[3] = bld_imm_f32(bld, 1.0f);
+ break;
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_LG2:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, opcode, src0);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_RSQ:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_ABS_F32, src0);
+ temp = bld_insn_1(bld, NV_OP_RSQ, temp);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dst0[c] = bld_insn_2(bld, opcode, src0, src1);
+ dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode);
+ }
+ break;
+ case TGSI_OPCODE_SCS:
+ if (insn->Dst[0].Register.WriteMask & 0x3) {
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+ if (insn->Dst[0].Register.WriteMask & 0x1)
+ dst0[0] = bld_insn_1(bld, NV_OP_COS, temp);
+ if (insn->Dst[0].Register.WriteMask & 0x2)
+ dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp);
+ }
+ if (insn->Dst[0].Register.WriteMask & 0x4)
+ dst0[2] = bld_imm_f32(bld, 0.0f);
+ if (insn->Dst[0].Register.WriteMask & 0x8)
+ dst0[3] = bld_imm_f32(bld, 1.0f);
+ break;
+ case TGSI_OPCODE_SSG:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero);
+ src2 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero);
+ src1->insn->set_cond = NV_CC_GT;
+ src2->insn->set_cond = NV_CC_LT;
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2);
+ }
+ break;
+ case TGSI_OPCODE_SUB:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ bld_tex(bld, dst0, insn);
+ break;
+ case TGSI_OPCODE_XPD:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ if (c == 3) {
+ dst0[3] = bld_imm_f32(bld, 1.0f);
+ break;
+ }
+ src0 = emit_fetch(bld, insn, 1, (c + 1) % 3);
+ src1 = emit_fetch(bld, insn, 0, (c + 2) % 3);
+ dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
+
+ src0 = emit_fetch(bld, insn, 0, (c + 1) % 3);
+ src1 = emit_fetch(bld, insn, 1, (c + 2) % 3);
+ dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]);
+
+ dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG;
+ }
+ break;
+ case TGSI_OPCODE_RET:
+ (new_instruction(bld->pc, NV_OP_RET))->fixed = 1;
+ break;
+ case TGSI_OPCODE_END:
+ /* VP outputs are exported in-place as scalars, optimization later */
+ if (bld->pc->is_fragprog)
+ bld_export_fp_outputs(bld);
+ if (bld->ti->append_ucp)
+ bld_append_vp_ucp(bld);
+ return;
+ default:
+ NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode);
+ abort();
+ return;
+ }
+
+ if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+ !bld->pc->is_fragprog) {
+ struct nv_instruction *mi = NULL;
+ uint size;
+
+ if (bld->ti->append_ucp) {
+ if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) {
+ bld->hpos_index = insn->Dst[0].Register.Index;
+ for (c = 0; c < 4; ++c)
+ if (mask & (1 << c))
+ STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]);
+ }
+ }
+
+ for (c = 0; c < 4; ++c)
+ if (mask & (1 << c))
+ if ((dst0[c]->reg.file == NV_FILE_IMM) ||
+ (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63))
+ dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
+
+ c = 0;
+ if ((mask & 0x3) == 0x3) {
+ mask &= ~0x3;
+ size = 8;
+ mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn;
+ }
+ if ((mask & 0xc) == 0xc) {
+ mask &= ~0xc;
+ if (mi) {
+ size = 16;
+ nv_reference(bld->pc, mi, 2, dst0[2]);
+ nv_reference(bld->pc, mi, 3, dst0[3]);
+ } else {
+ c = 2;
+ size = 8;
+ mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn;
+ }
+ } else
+ if (mi && (mask & 0x4)) {
+ size = 12;
+ mask &= ~0x4;
+ nv_reference(bld->pc, mi, 2, dst0[2]);
+ }
+
+ if (mi) {
+ struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT);
+ int s;
+
+ nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4));
+ nv_reference(bld->pc, ex, 1, mi->def[0]);
+
+ for (s = 1; s < size / 4; ++s) {
+ bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4));
+ nv_reference(bld->pc, ex, s + 1, mi->def[s]);
+ }
+
+ ex->fixed = 1;
+ ex->src[0]->value->reg.size = size;
+ ex->src[0]->value->reg.address =
+ bld->ti->output_loc[insn->Dst[0].Register.Index][c];
+ }
+ }
+
+ for (c = 0; c < 4; ++c)
+ if (mask & (1 << c))
+ emit_store(bld, insn, c, dst0[c]);
+}
+
+static INLINE void
+bld_free_registers(struct bld_register *base, int n)
+{
+ int i, c;
+
+ for (i = 0; i < n; ++i)
+ for (c = 0; c < 4; ++c)
+ util_dynarray_fini(&base[i * 4 + c].vals);
+}
+
+int
+nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti)
+{
+ struct bld_context *bld = CALLOC_STRUCT(bld_context);
+ unsigned ip;
+
+ pc->root[0] = pc->current_block = new_basic_block(pc);
+
+ bld->pc = pc;
+ bld->ti = ti;
+
+ pc->loop_nesting_bound = 1;
+
+ bld->zero = new_value(pc, NV_FILE_GPR, 4);
+ bld->zero->reg.id = 63;
+
+ if (pc->is_fragprog) {
+ struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4);
+ mem->reg.address = 0x7c;
+
+ bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem);
+ bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]);
+ }
+
+ for (ip = 0; ip < ti->num_insns; ++ip)
+ bld_instruction(bld, &ti->insns[ip]);
+
+ bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS);
+ bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS);
+ bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS);
+ bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
+
+ FREE(bld);
+ return 0;
+}
+
+/* If a variable is assigned in a loop, replace all references to the value
+ * from outside the loop with a phi value.
+ */
+static void
+bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
+ struct nv_value *old_val,
+ struct nv_value *new_val)
+{
+ struct nv_instruction *nvi;
+
+ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) {
+ int s;
+ for (s = 0; s < 6 && nvi->src[s]; ++s)
+ if (nvi->src[s]->value == old_val)
+ nv_reference(pc, nvi, s, new_val);
+ }
+
+ b->pass_seq = pc->pass_seq;
+
+ if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
+ bld_replace_value(pc, b->out[0], old_val, new_val);
+
+ if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
+ bld_replace_value(pc, b->out[1], old_val, new_val);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c
new file mode 100644
index 0000000000..7bbfe057e5
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_transfer.c
@@ -0,0 +1,385 @@
+
+#include "util/u_format.h"
+
+#include "nvc0_context.h"
+#include "nvc0_transfer.h"
+
+#include "nv50/nv50_defs.xml.h"
+
+struct nvc0_transfer {
+ struct pipe_transfer base;
+ struct nvc0_m2mf_rect rect[2];
+ uint32_t nblocksx;
+ uint16_t nblocksy;
+ uint16_t nlayers;
+};
+
+void
+nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen,
+ const struct nvc0_m2mf_rect *dst,
+ const struct nvc0_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy)
+{
+ struct nouveau_channel *chan = nouveau_screen(pscreen)->channel;
+ const int cpp = dst->cpp;
+ uint32_t src_ofst = src->base;
+ uint32_t dst_ofst = dst->base;
+ uint32_t height = nblocksy;
+ uint32_t sy = src->y;
+ uint32_t dy = dst->y;
+ uint32_t exec = (1 << 20);
+
+ assert(dst->cpp == src->cpp);
+
+ if (nouveau_bo_tile_layout(src->bo)) {
+ BEGIN_RING(chan, RING_MF(TILING_MODE_IN), 5);
+ OUT_RING (chan, src->tile_mode);
+ OUT_RING (chan, src->width * cpp);
+ OUT_RING (chan, src->height);
+ OUT_RING (chan, src->depth);
+ OUT_RING (chan, src->z);
+ } else {
+ src_ofst += src->y * src->pitch + src->x * cpp;
+
+ BEGIN_RING(chan, RING_MF(PITCH_IN), 1);
+ OUT_RING (chan, src->width * cpp);
+
+ exec |= NVC0_M2MF_EXEC_LINEAR_IN;
+ }
+
+ if (nouveau_bo_tile_layout(dst->bo)) {
+ BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5);
+ OUT_RING (chan, dst->tile_mode);
+ OUT_RING (chan, dst->width * cpp);
+ OUT_RING (chan, dst->height);
+ OUT_RING (chan, dst->depth);
+ OUT_RING (chan, dst->z);
+ } else {
+ dst_ofst += dst->y * dst->pitch + dst->x * cpp;
+
+ BEGIN_RING(chan, RING_MF(PITCH_OUT), 1);
+ OUT_RING (chan, dst->width * cpp);
+
+ exec |= NVC0_M2MF_EXEC_LINEAR_OUT;
+ }
+
+ while (height) {
+ int line_count = height > 2047 ? 2047 : height;
+
+ MARK_RING (chan, 17, 4);
+
+ BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
+ OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
+
+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+ OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
+
+ if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) {
+ BEGIN_RING(chan, RING_MF(TILING_POSITION_IN_X), 2);
+ OUT_RING (chan, src->x * cpp);
+ OUT_RING (chan, sy);
+ } else {
+ src_ofst += line_count * src->pitch;
+ }
+ if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) {
+ BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2);
+ OUT_RING (chan, dst->x * cpp);
+ OUT_RING (chan, dy);
+ } else {
+ dst_ofst += line_count * dst->pitch;
+ }
+
+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+ OUT_RING (chan, nblocksx * cpp);
+ OUT_RING (chan, line_count);
+ BEGIN_RING(chan, RING_MF(EXEC), 1);
+ OUT_RING (chan, exec);
+
+ height -= line_count;
+ sy += line_count;
+ dy += line_count;
+ }
+}
+
+void
+nvc0_m2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, void *data)
+{
+ struct nouveau_channel *chan = nv->screen->channel;
+ uint32_t *src = (uint32_t *)data;
+ unsigned count = (size + 3) / 4;
+
+ MARK_RING (chan, 8, 2);
+
+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+ OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+ OUT_RING (chan, size);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_MF(EXEC), 1);
+ OUT_RING (chan, 0x100111);
+
+ while (count) {
+ unsigned nr = AVAIL_RING(chan);
+
+ if (nr < 9) {
+ FIRE_RING(chan);
+ nouveau_bo_validate(chan, dst, NOUVEAU_BO_WR);
+ continue;
+ }
+ nr = MIN2(count, nr - 1);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_RING_NI(chan, RING_MF(DATA), nr);
+ OUT_RINGp (chan, src, nr);
+
+ src += nr;
+ count -= nr;
+ }
+}
+
+void
+nvc0_m2mf_copy_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size)
+{
+ struct nouveau_channel *chan = nv->screen->channel;
+
+ while (size) {
+ unsigned bytes = MIN2(size, 1 << 17);
+
+ MARK_RING (chan, 11, 4);
+
+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+ OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
+ OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+ OUT_RING (chan, bytes);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_MF(EXEC), 1);
+ OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) |
+ NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT);
+
+ srcoff += bytes;
+ dstoff += bytes;
+ size -= bytes;
+ }
+}
+
+static void
+nvc0_m2mf_push_rect(struct pipe_screen *pscreen,
+ const struct nvc0_m2mf_rect *dst,
+ const void *data,
+ unsigned nblocksx, unsigned nblocksy)
+{
+ struct nouveau_channel *chan;
+ const uint8_t *src = (const uint8_t *)data;
+ const int cpp = dst->cpp;
+ const int line_len = nblocksx * cpp;
+ int dy = dst->y;
+
+ assert(nouveau_bo_tile_layout(dst->bo));
+
+ BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5);
+ OUT_RING (chan, dst->tile_mode);
+ OUT_RING (chan, dst->width * cpp);
+ OUT_RING (chan, dst->height);
+ OUT_RING (chan, dst->depth);
+ OUT_RING (chan, dst->z);
+
+ while (nblocksy) {
+ int line_count, words;
+ int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN);
+
+ if (size < (12 + words)) {
+ FIRE_RING(chan);
+ continue;
+ }
+ line_count = (size * 4) / line_len;
+ words = (line_count * line_len + 3) / 4;
+
+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+ OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2);
+ OUT_RING (chan, dst->x * cpp);
+ OUT_RING (chan, dy);
+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+ OUT_RING (chan, line_len);
+ OUT_RING (chan, line_count);
+ BEGIN_RING(chan, RING_MF(EXEC), 1);
+ OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) |
+ NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN);
+
+ BEGIN_RING_NI(chan, RING_MF(DATA), words);
+ OUT_RINGp (chan, src, words);
+
+ dy += line_count;
+ src += line_len * line_count;
+ nblocksy -= line_count;
+ }
+}
+
+struct pipe_transfer *
+nvc0_miptree_transfer_new(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pctx);
+ struct pipe_screen *pscreen = pctx->screen;
+ struct nouveau_device *dev = nvc0->screen->base.device;
+ struct nvc0_miptree *mt = nvc0_miptree(res);
+ struct nvc0_miptree_level *lvl = &mt->level[level];
+ struct nvc0_transfer *tx;
+ uint32_t size;
+ uint32_t w, h, d, z, layer;
+ int ret;
+
+ tx = CALLOC_STRUCT(nvc0_transfer);
+ if (!tx)
+ return NULL;
+
+ if (mt->layout_3d) {
+ z = box->z;
+ d = u_minify(res->depth0, level);
+ layer = 0;
+ } else {
+ z = 0;
+ d = 1;
+ layer = box->z;
+ }
+ tx->nlayers = box->depth;
+
+ pipe_resource_reference(&tx->base.resource, res);
+
+ tx->base.level = level;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+
+ tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
+ tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
+
+ tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
+ tx->base.layer_stride = tx->nblocksy * tx->base.stride;
+
+ w = u_minify(res->width0, level);
+ h = u_minify(res->height0, level);
+
+ tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format);
+
+ tx->rect[0].bo = mt->base.bo;
+ tx->rect[0].base = lvl->offset + layer * mt->layer_stride;
+ tx->rect[0].tile_mode = lvl->tile_mode;
+ tx->rect[0].x = util_format_get_nblocksx(res->format, box->x);
+ tx->rect[0].y = util_format_get_nblocksy(res->format, box->y);
+ tx->rect[0].z = z;
+ tx->rect[0].width = util_format_get_nblocksx(res->format, w);
+ tx->rect[0].height = util_format_get_nblocksy(res->format, h);
+ tx->rect[0].depth = d;
+ tx->rect[0].pitch = lvl->pitch;
+ tx->rect[0].domain = NOUVEAU_BO_VRAM;
+
+ size = tx->base.layer_stride;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+ size * tx->nlayers, &tx->rect[1].bo);
+ if (ret) {
+ FREE(tx);
+ return NULL;
+ }
+
+ tx->rect[1].width = tx->nblocksx;
+ tx->rect[1].height = tx->nblocksy;
+ tx->rect[1].depth = 1;
+ tx->rect[1].pitch = tx->base.stride;
+ tx->rect[1].domain = NOUVEAU_BO_GART;
+
+ if (usage & PIPE_TRANSFER_READ) {
+ unsigned base = tx->rect[0].base;
+ unsigned i;
+ for (i = 0; i < tx->nlayers; ++i) {
+ nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += size;
+ }
+ tx->rect[0].z = z;
+ tx->rect[0].base = base;
+ tx->rect[1].base = 0;
+ }
+
+ return &tx->base;
+}
+
+void
+nvc0_miptree_transfer_del(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct pipe_screen *pscreen = pctx->screen;
+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+ struct nvc0_miptree *mt = nvc0_miptree(tx->base.resource);
+ unsigned i;
+
+ if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+ for (i = 0; i < tx->nlayers; ++i) {
+ nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += tx->nblocksy * tx->base.stride;
+ }
+ }
+
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ FREE(tx);
+}
+
+void *
+nvc0_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+ int ret;
+ unsigned flags = 0;
+
+ if (tx->rect[1].bo->map)
+ return tx->rect[1].bo->map;
+
+ if (transfer->usage & PIPE_TRANSFER_READ)
+ flags = NOUVEAU_BO_RD;
+ if (transfer->usage & PIPE_TRANSFER_WRITE)
+ flags |= NOUVEAU_BO_WR;
+
+ ret = nouveau_bo_map(tx->rect[1].bo, flags);
+ if (ret)
+ return NULL;
+ return tx->rect[1].bo->map;
+}
+
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+
+ nouveau_bo_unmap(tx->rect[1].bo);
+}
+
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h
new file mode 100644
index 0000000000..803ee3463e
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_transfer.h
@@ -0,0 +1,44 @@
+
+#ifndef __NVC0_TRANSFER_H__
+#define __NVC0_TRANSFER_H__
+
+#include "pipe/p_state.h"
+
+struct pipe_transfer *
+nvc0_miptree_transfer_new(struct pipe_context *pcontext,
+ struct pipe_resource *pt,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box);
+void
+nvc0_miptree_transfer_del(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+void *
+nvc0_miptree_transfer_map(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+
+struct nvc0_m2mf_rect {
+ struct nouveau_bo *bo;
+ uint32_t base;
+ unsigned domain;
+ uint32_t pitch;
+ uint32_t width;
+ uint32_t x;
+ uint32_t height;
+ uint32_t y;
+ uint16_t depth;
+ uint16_t z;
+ uint16_t tile_mode;
+ uint16_t cpp;
+};
+
+void
+nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen,
+ const struct nvc0_m2mf_rect *dst,
+ const struct nvc0_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy);
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
new file mode 100644
index 0000000000..6bbcf2447e
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
@@ -0,0 +1,653 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3d.xml.h"
+
+void
+nvc0_vertex_state_delete(struct pipe_context *pipe,
+ void *hwcso)
+{
+ struct nvc0_vertex_stateobj *so = hwcso;
+
+ if (so->translate)
+ so->translate->release(so->translate);
+ FREE(hwcso);
+}
+
+void *
+nvc0_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nvc0_vertex_stateobj *so;
+ struct translate_key transkey;
+ unsigned i;
+
+ so = MALLOC(sizeof(*so) +
+ num_elements * sizeof(struct nvc0_vertex_element));
+ if (!so)
+ return NULL;
+ so->num_elements = num_elements;
+ so->instance_elts = 0;
+ so->instance_bufs = 0;
+ so->need_conversion = FALSE;
+
+ transkey.nr_elements = 0;
+ transkey.output_stride = 0;
+
+ for (i = 0; i < num_elements; ++i) {
+ const struct pipe_vertex_element *ve = &elements[i];
+ const unsigned vbi = ve->vertex_buffer_index;
+ enum pipe_format fmt = ve->src_format;
+
+ so->element[i].pipe = elements[i];
+ so->element[i].state = nvc0_format_table[fmt].vtx;
+
+ if (!so->element[i].state) {
+ switch (util_format_get_nr_components(fmt)) {
+ case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
+ case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
+ case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
+ case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
+ default:
+ assert(0);
+ return NULL;
+ }
+ so->element[i].state = nvc0_format_table[fmt].vtx;
+ so->need_conversion = TRUE;
+ }
+ so->element[i].state |= i;
+
+ if (1) {
+ unsigned j = transkey.nr_elements++;
+
+ transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
+ transkey.element[j].input_format = ve->src_format;
+ transkey.element[j].input_buffer = vbi;
+ transkey.element[j].input_offset = ve->src_offset;
+ transkey.element[j].instance_divisor = ve->instance_divisor;
+
+ transkey.element[j].output_format = fmt;
+ transkey.element[j].output_offset = transkey.output_stride;
+ transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;
+
+ if (unlikely(ve->instance_divisor)) {
+ so->instance_elts |= 1 << i;
+ so->instance_bufs |= 1 << vbi;
+ }
+ }
+ }
+
+ so->translate = translate_create(&transkey);
+ so->vtx_size = transkey.output_stride / 4;
+ so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1);
+
+ return so;
+}
+
+#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST
+
+#define VTX_ATTR(a, c, t, s) \
+ ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \
+ (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \
+ ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \
+ ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT))
+
+static void
+nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb,
+ struct pipe_vertex_element *ve, unsigned attr)
+{
+ const void *data;
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nv04_resource *res = nv04_resource(vb->buffer);
+ float v[4];
+ int i;
+ const unsigned nc = util_format_get_nr_components(ve->src_format);
+
+ data = nouveau_resource_map_offset(&nvc0->base, res, vb->buffer_offset +
+ ve->src_offset, NOUVEAU_BO_RD);
+
+ util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1);
+
+ BEGIN_RING(chan, RING_3D(VTX_ATTR_DEFINE), nc + 1);
+ OUT_RING (chan, VTX_ATTR(attr, nc, FLOAT, 32));
+ for (i = 0; i < nc; ++i)
+ OUT_RINGf(chan, v[i]);
+}
+
+static INLINE void
+nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi,
+ uint32_t *base, uint32_t *size)
+{
+ if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) {
+ /* TODO: use min and max instance divisor to get a proper range */
+ *base = 0;
+ *size = nvc0->vtxbuf[vbi].buffer->width0;
+ } else {
+ assert(nvc0->vbo_max_index != ~0);
+ *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride;
+ *size = (nvc0->vbo_max_index -
+ nvc0->vbo_min_index + 1) * nvc0->vtxbuf[vbi].stride;
+ }
+}
+
+static void
+nvc0_prevalidate_vbufs(struct nvc0_context *nvc0)
+{
+ struct pipe_vertex_buffer *vb;
+ struct nv04_resource *buf;
+ int i;
+ uint32_t base, size;
+
+ nvc0->vbo_fifo = nvc0->vbo_user = 0;
+
+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX);
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ vb = &nvc0->vtxbuf[i];
+ if (!vb->stride)
+ continue;
+ buf = nv04_resource(vb->buffer);
+
+ /* NOTE: user buffers with temporary storage count as mapped by GPU */
+ if (!nouveau_resource_mapped_by_gpu(vb->buffer)) {
+ if (nvc0->vbo_push_hint) {
+ nvc0->vbo_fifo = ~0;
+ continue;
+ } else {
+ if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) {
+ nvc0->vbo_user |= 1 << i;
+ assert(vb->stride > vb->buffer_offset);
+ nvc0_vbuf_range(nvc0, i, &base, &size);
+ nouveau_user_buffer_upload(buf, base, size);
+ } else {
+ nouveau_buffer_migrate(&nvc0->base, buf, NOUVEAU_BO_GART);
+ }
+ nvc0->base.vbo_dirty = TRUE;
+ }
+ }
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, buf, NOUVEAU_BO_RD);
+ nouveau_buffer_adjust_score(&nvc0->base, buf, 1);
+ }
+}
+
+static void
+nvc0_update_user_vbufs(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ uint32_t base, offset, size;
+ int i;
+ uint32_t written = 0;
+
+ for (i = 0; i < nvc0->vertex->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
+ const int b = ve->vertex_buffer_index;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
+ struct nv04_resource *buf = nv04_resource(vb->buffer);
+
+ if (!(nvc0->vbo_user & (1 << b)))
+ continue;
+
+ if (!vb->stride) {
+ nvc0_emit_vtxattr(nvc0, vb, ve, i);
+ continue;
+ }
+ nvc0_vbuf_range(nvc0, b, &base, &size);
+
+ if (!(written & (1 << b))) {
+ written |= 1 << b;
+ nouveau_user_buffer_upload(buf, base, size);
+ }
+ offset = vb->buffer_offset + ve->src_offset;
+
+ MARK_RING (chan, 6, 4);
+ BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
+ OUT_RING (chan, i);
+ OUT_RESRCh(chan, buf, base + size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, buf, base + size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD);
+ }
+ nvc0->base.vbo_dirty = TRUE;
+}
+
+static INLINE void
+nvc0_release_user_vbufs(struct nvc0_context *nvc0)
+{
+ uint32_t vbo_user = nvc0->vbo_user;
+
+ while (vbo_user) {
+ int i = ffs(vbo_user) - 1;
+ vbo_user &= ~(1 << i);
+
+ nouveau_buffer_release_gpu_storage(nv04_resource(nvc0->vtxbuf[i].buffer));
+ }
+}
+
+void
+nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
+ struct pipe_vertex_buffer *vb;
+ struct nvc0_vertex_element *ve;
+ unsigned i;
+
+ if (unlikely(vertex->need_conversion || NVC0_USING_EDGEFLAG(nvc0))) {
+ nvc0->vbo_fifo = ~0;
+ nvc0->vbo_user = 0;
+ } else {
+ nvc0_prevalidate_vbufs(nvc0);
+ }
+
+ BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements);
+ for (i = 0; i < vertex->num_elements; ++i) {
+ ve = &vertex->element[i];
+ vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
+
+ if (likely(vb->stride) || nvc0->vbo_fifo) {
+ OUT_RING(chan, ve->state);
+ } else {
+ OUT_RING(chan, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
+ nvc0->vbo_fifo &= ~(1 << i);
+ }
+ }
+
+ for (i = 0; i < vertex->num_elements; ++i) {
+ struct nv04_resource *res;
+ unsigned size, offset;
+
+ ve = &vertex->element[i];
+ vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
+
+ if (unlikely(ve->pipe.instance_divisor)) {
+ if (!(nvc0->state.instance_elts & (1 << i))) {
+ IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
+ }
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
+ OUT_RING (chan, ve->pipe.instance_divisor);
+ } else
+ if (unlikely(nvc0->state.instance_elts & (1 << i))) {
+ IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0);
+ }
+
+ res = nv04_resource(vb->buffer);
+
+ if (nvc0->vbo_fifo || unlikely(vb->stride == 0)) {
+ if (!nvc0->vbo_fifo)
+ nvc0_emit_vtxattr(nvc0, vb, &ve->pipe, i);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ OUT_RING (chan, 0);
+ continue;
+ }
+
+ size = vb->buffer->width0;
+ offset = ve->pipe.src_offset + vb->buffer_offset;
+
+ MARK_RING (chan, 8, 4);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ OUT_RING (chan, (1 << 12) | vb->stride);
+ BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
+ OUT_RING (chan, i);
+ OUT_RESRCh(chan, res, size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD);
+ }
+ for (; i < nvc0->state.num_vtxelts; ++i) {
+ BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1);
+ OUT_RING (chan, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ OUT_RING (chan, 0);
+ }
+
+ nvc0->state.num_vtxelts = vertex->num_elements;
+ nvc0->state.instance_elts = vertex->instance_elts;
+}
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+static void
+nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan)
+{
+ struct nvc0_context *nvc0 = chan->user_private;
+
+ nouveau_fence_update(&nvc0->screen->base, TRUE);
+
+ nvc0_bufctx_emit_relocs(nvc0);
+}
+
+static void
+nvc0_draw_arrays(struct nvc0_context *nvc0,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ unsigned prim;
+
+ chan->flush_notify = nvc0_draw_vbo_flush_notify;
+ chan->user_private = nvc0;
+
+ prim = nvc0_prim_gl(mode);
+
+ while (instance_count--) {
+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (chan, prim);
+ BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
+
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+
+ chan->flush_notify = nvc0_default_flush_notify;
+}
+
+static void
+nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 3) {
+ unsigned i;
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), count & 3);
+ for (i = 0; i < (count & 3); ++i)
+ OUT_RING(chan, *map++);
+ count &= ~3;
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr);
+ for (i = 0; i < nr; ++i) {
+ OUT_RING(chan,
+ (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
+ map += 4;
+ }
+ count -= nr * 4;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count &= ~1;
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
+ OUT_RING (chan, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ OUT_RING(chan, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ while (count) {
+ const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr);
+ OUT_RINGp (chan, map, nr);
+
+ map += nr;
+ count -= nr;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count--;
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
+ OUT_RING (chan, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ OUT_RING(chan, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count, int32_t index_bias)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ void *data;
+ unsigned prim;
+ const unsigned index_size = nvc0->idxbuf.index_size;
+
+ chan->flush_notify = nvc0_draw_vbo_flush_notify;
+ chan->user_private = nvc0;
+
+ prim = nvc0_prim_gl(mode);
+
+ if (index_bias != nvc0->state.index_bias) {
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_BASE), 1);
+ OUT_RING (chan, index_bias);
+ nvc0->state.index_bias = index_bias;
+ }
+
+ if (nouveau_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) {
+ struct nv04_resource *res = nv04_resource(nvc0->idxbuf.buffer);
+ unsigned offset = nvc0->idxbuf.offset;
+ unsigned limit = nvc0->idxbuf.buffer->width0 - 1;
+
+ nouveau_buffer_adjust_score(&nvc0->base, res, 1);
+
+ while (instance_count--) {
+ MARK_RING (chan, 11, 4);
+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (chan, mode);
+ BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7);
+ OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD);
+ OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD);
+ OUT_RING (chan, index_size >> 1);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
+
+ nvc0_resource_fence(res, NOUVEAU_BO_RD);
+
+ mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ } else {
+ data = nouveau_resource_map_offset(&nvc0->base,
+ nv04_resource(nvc0->idxbuf.buffer),
+ nvc0->idxbuf.offset, NOUVEAU_BO_RD);
+ if (!data)
+ return;
+
+ while (instance_count--) {
+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (chan, prim);
+ switch (index_size) {
+ case 1:
+ nvc0_draw_elements_inline_u08(chan, data, start, count);
+ break;
+ case 2:
+ nvc0_draw_elements_inline_u16(chan, data, start, count);
+ break;
+ case 4:
+ if (shorten)
+ nvc0_draw_elements_inline_u32_short(chan, data, start, count);
+ else
+ nvc0_draw_elements_inline_u32(chan, data, start, count);
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
+
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ }
+
+ chan->flush_notify = nvc0_default_flush_notify;
+}
+
+void
+nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ /* For picking only a few vertices from a large user buffer, push is better,
+ * if index count is larger and we expect repeated vertices, suggest upload.
+ */
+ nvc0->vbo_push_hint = /* the 64 is heuristic */
+ !(info->indexed &&
+ ((info->max_index - info->min_index + 64) < info->count));
+
+ nvc0->vbo_min_index = info->min_index;
+ nvc0->vbo_max_index = info->max_index;
+
+ if (nvc0->vbo_push_hint != !!nvc0->vbo_fifo)
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+
+ if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS)))
+ nvc0_update_user_vbufs(nvc0);
+
+ nvc0_state_validate(nvc0);
+
+ if (nvc0->vbo_fifo) {
+ nvc0_push_vbo(nvc0, info);
+ return;
+ }
+
+ if (nvc0->state.instance_base != info->start_instance) {
+ nvc0->state.instance_base = info->start_instance;
+ /* NOTE: this does not affect the shader input, should it ? */
+ BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1);
+ OUT_RING (chan, info->start_instance);
+ }
+
+ if (nvc0->base.vbo_dirty) {
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 1);
+ OUT_RING (chan, 0);
+ nvc0->base.vbo_dirty = FALSE;
+ }
+
+ if (!info->indexed) {
+ nvc0_draw_arrays(nvc0,
+ info->mode, info->start, info->count,
+ info->instance_count);
+ } else {
+ boolean shorten = info->max_index <= 65535;
+
+ assert(nvc0->idxbuf.buffer);
+
+ if (info->primitive_restart != nvc0->state.prim_restart) {
+ if (info->primitive_restart) {
+ BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ } else {
+ IMMED_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0);
+ }
+ nvc0->state.prim_restart = info->primitive_restart;
+ } else
+ if (info->primitive_restart) {
+ BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1);
+ OUT_RING (chan, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ }
+
+ nvc0_draw_elements(nvc0, shorten,
+ info->mode, info->start, info->count,
+ info->instance_count, info->index_bias);
+ }
+
+ nvc0_release_user_vbufs(nvc0);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h
new file mode 100644
index 0000000000..6519ce8e19
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_winsys.h
@@ -0,0 +1,120 @@
+
+#ifndef __NVC0_WINSYS_H__
+#define __NVC0_WINSYS_H__
+
+#include <stdint.h>
+#include <unistd.h>
+#include "pipe/p_defines.h"
+
+#include "nouveau/nouveau_bo.h"
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_grobj.h"
+#include "nouveau/nouveau_device.h"
+#include "nouveau/nouveau_resource.h"
+#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nouveau_reloc.h"
+
+#include "nvc0_resource.h" /* OUT_RESRC */
+
+#ifndef NV04_PFIFO_MAX_PACKET_LEN
+#define NV04_PFIFO_MAX_PACKET_LEN 2047
+#endif
+
+#define NVC0_SUBCH_3D 1
+#define NVC0_SUBCH_2D 2
+#define NVC0_SUBCH_MF 3
+
+#define NVC0_MF_(n) NVC0_M2MF_##n
+
+#define RING_3D(n) ((NVC0_SUBCH_3D << 13) | (NVC0_3D_##n >> 2))
+#define RING_2D(n) ((NVC0_SUBCH_2D << 13) | (NVC0_2D_##n >> 2))
+#define RING_MF(n) ((NVC0_SUBCH_MF << 13) | (NVC0_MF_(n) >> 2))
+
+#define RING_3D_(m) ((NVC0_SUBCH_3D << 13) | ((m) >> 2))
+#define RING_2D_(m) ((NVC0_SUBCH_2D << 13) | ((m) >> 2))
+#define RING_MF_(m) ((NVC0_SUBCH_MF << 13) | ((m) >> 2))
+
+#define RING_GR(gr, m) (((gr)->subc << 13) | ((m) >> 2))
+
+int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min);
+
+static inline uint32_t
+nouveau_bo_tile_layout(struct nouveau_bo *bo)
+{
+ return bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK;
+}
+
+static INLINE void
+nouveau_bo_validate(struct nouveau_channel *chan,
+ struct nouveau_bo *bo, unsigned flags)
+{
+ nouveau_reloc_emit(chan, NULL, 0, NULL, bo, 0, 0, flags, 0, 0);
+}
+
+/* incremental methods */
+static INLINE void
+BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+ WAIT_RING(chan, size + 1);
+ OUT_RING (chan, (0x2 << 28) | (size << 16) | mthd);
+}
+
+/* non-incremental */
+static INLINE void
+BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+ WAIT_RING(chan, size + 1);
+ OUT_RING (chan, (0x6 << 28) | (size << 16) | mthd);
+}
+
+/* increment-once */
+static INLINE void
+BEGIN_RING_1I(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+ WAIT_RING(chan, size + 1);
+ OUT_RING (chan, (0xa << 28) | (size << 16) | mthd);
+}
+
+/* inline-data */
+static INLINE void
+IMMED_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned data)
+{
+ WAIT_RING(chan, 1);
+ OUT_RING (chan, (0x8 << 28) | (data << 16) | mthd);
+}
+
+static INLINE int
+OUT_RESRCh(struct nouveau_channel *chan, struct nv04_resource *res,
+ unsigned delta, unsigned flags)
+{
+ return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags);
+}
+
+static INLINE int
+OUT_RESRCl(struct nouveau_channel *chan, struct nv04_resource *res,
+ unsigned delta, unsigned flags)
+{
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags);
+}
+
+static INLINE void
+BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned s)
+{
+ struct nouveau_subchannel *subc = &gr->channel->subc[s];
+
+ assert(s < 8);
+ if (subc->gr) {
+ assert(subc->gr->bound != NOUVEAU_GROBJ_BOUND_EXPLICIT);
+ subc->gr->bound = NOUVEAU_GROBJ_UNBOUND;
+ }
+ subc->gr = gr;
+ subc->gr->subc = s;
+ subc->gr->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT;
+
+ BEGIN_RING(chan, RING_GR(gr, 0x0000), 1);
+ OUT_RING (chan, gr->grclass);
+}
+
+#endif
diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile
index 46bb082388..a3b76ac61b 100644
--- a/src/gallium/drivers/nvfx/Makefile
+++ b/src/gallium/drivers/nvfx/Makefile
@@ -27,6 +27,7 @@ C_SOURCES = \
nvfx_vertprog.c
LIBRARY_INCLUDES = \
+ $(LIBDRM_CFLAGS) \
-I$(TOP)/src/gallium/drivers/nouveau/include
include ../../Makefile.template
diff --git a/src/gallium/drivers/nvfx/nv04_2d.c b/src/gallium/drivers/nvfx/nv04_2d.c
index e0e65e7a87..e2fadd33e1 100644
--- a/src/gallium/drivers/nvfx/nv04_2d.c
+++ b/src/gallium/drivers/nvfx/nv04_2d.c
@@ -34,11 +34,11 @@
#include <stdio.h>
#include <stdint.h>
#include <nouveau/nouveau_device.h>
-#include <nouveau/nouveau_pushbuf.h>
#include <nouveau/nouveau_channel.h>
#include <nouveau/nouveau_bo.h>
#include <nouveau/nouveau_notifier.h>
#include <nouveau/nouveau_grobj.h>
+#include <nouveau/nv04_pushbuf.h>
#include "nv04_2d.h"
#include "nouveau/nv_object.xml.h"
diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
index 951fb202ed..b609891d31 100644
--- a/src/gallium/drivers/nvfx/nv30_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
@@ -71,6 +71,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit];
struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo;
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned txf;
unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
unsigned use_rect;
@@ -102,7 +103,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
txf = sv->u.nv30.fmt[ps->compare + (use_rect ? 2 : 0)];
MARK_RING(chan, 9, 2);
- OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8));
+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8);
OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0);
OUT_RELOC(chan, bo, txf,
tex_flags | NOUVEAU_BO_OR,
diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c
index e8ab403f72..563183d9d0 100644
--- a/src/gallium/drivers/nvfx/nv40_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv40_fragtex.c
@@ -76,6 +76,7 @@ void
nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit];
struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo;
@@ -87,7 +88,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
txf = sv->u.nv40.fmt[ps->compare] | ps->fmt;
MARK_RING(chan, 11, 2);
- OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8));
+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8);
OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0);
OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR,
NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1);
@@ -97,7 +98,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
OUT_RING(chan, ps->filt | sv->filt);
OUT_RING(chan, sv->npot_size);
OUT_RING(chan, ps->bcol);
- OUT_RING(chan, RING_3D(NV40_3D_TEX_SIZE1(unit), 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_TEX_SIZE1(unit), 1);
OUT_RING(chan, sv->u.nv40.npot_size2);
nvfx->hw_txf[unit] = txf;
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
index 95834d2327..2bcb93d93e 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -7,21 +7,22 @@
#include "nvfx_resource.h"
static void
-nvfx_flush(struct pipe_context *pipe, unsigned flags,
+nvfx_flush(struct pipe_context *pipe,
struct pipe_fence_handle **fence)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
/* XXX: we need to actually be intelligent here */
- if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(0x1fd8, 1));
+ /* XXX This flag wasn't set by the state tracker anyway. */
+ /*if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ BEGIN_RING(chan, eng3d, 0x1fd8, 1);
OUT_RING(chan, 2);
- OUT_RING(chan, RING_3D(0x1fd8, 1));
+ BEGIN_RING(chan, eng3d, 0x1fd8, 1);
OUT_RING(chan, 1);
- }
+ }*/
FIRE_RING(chan);
if (fence)
diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h
index 6ef2a6945d..dad912b2ae 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.h
+++ b/src/gallium/drivers/nvfx/nvfx_context.h
@@ -18,6 +18,7 @@
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
+#include "nouveau/nouveau_resource.h"
#include "nv30-40_3d.xml.h"
#include "nvfx_state.h"
@@ -339,30 +340,31 @@ extern void nvfx_init_vertprog_functions(struct nvfx_context *nvfx);
/* nvfx_push.c */
extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
-/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */
-static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, const float* v, unsigned ncomp)
+static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan,
+ struct nouveau_grobj *eng3d, unsigned attrib, const float* v,
+ unsigned ncomp)
{
switch (ncomp) {
case 4:
- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_4F_X(attrib), 4));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_4F_X(attrib), 4);
OUT_RING(chan, fui(v[0]));
OUT_RING(chan, fui(v[1]));
OUT_RING(chan, fui(v[2]));
OUT_RING(chan, fui(v[3]));
break;
case 3:
- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_3F_X(attrib), 3));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_3F_X(attrib), 3);
OUT_RING(chan, fui(v[0]));
OUT_RING(chan, fui(v[1]));
OUT_RING(chan, fui(v[2]));
break;
case 2:
- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_2F_X(attrib), 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_2F_X(attrib), 2);
OUT_RING(chan, fui(v[0]));
OUT_RING(chan, fui(v[1]));
break;
case 1:
- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_1F(attrib), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_1F(attrib), 1);
OUT_RING(chan, fui(v[0]));
break;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
index 61f888a8ea..81f1ec485d 100644
--- a/src/gallium/drivers/nvfx/nvfx_draw.c
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -28,10 +28,10 @@ nvfx_render_flush(struct draw_stage *stage, unsigned flags)
struct nvfx_render_stage *rs = nvfx_render_stage(stage);
struct nvfx_context *nvfx = rs->nvfx;
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) {
- assert(AVAIL_RING(chan) >= 2);
- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP);
rs->prim = NV30_3D_VERTEX_BEGIN_END_STOP;
}
@@ -46,6 +46,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
boolean no_elements = nvfx->vertprog->draw_no_elements;
unsigned num_attribs = nvfx->vertprog->draw_elements;
@@ -63,7 +64,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
/* Switch primitive modes if necessary */
if (rs->prim != mode) {
if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) {
- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP);
}
@@ -74,23 +75,24 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
int i;
for(i = 0; i < 32; ++i)
{
- OUT_RING(chan, RING_3D(0x1dac, 1));
+ BEGIN_RING(chan, eng3d, 0x1dac, 1);
OUT_RING(chan, 0);
}
}
- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
OUT_RING (chan, mode);
rs->prim = mode;
}
- OUT_RING(chan, RING_3D_NI(NV30_3D_VERTEX_DATA, num_attribs * 4 * count));
if(no_elements) {
+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, 4);
OUT_RING(chan, 0);
OUT_RING(chan, 0);
OUT_RING(chan, 0);
OUT_RING(chan, 0);
} else {
+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, num_attribs * 4 * count);
for (unsigned i = 0; i < count; ++i)
{
struct vertex_header* v = prim->v[i];
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
index 13e8beed47..dbd7c77346 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -1189,12 +1189,12 @@ out_err:
static inline void
nvfx_fp_memcpy(void* dst, const void* src, size_t len)
{
-#ifndef WORDS_BIGENDIAN
+#ifndef PIPE_ARCH_BIG_ENDIAN
memcpy(dst, src, len);
#else
size_t i;
for(i = 0; i < len; i += 4) {
- uint32_t v = (uint32_t*)((char*)src + i);
+ uint32_t v = *(uint32_t*)((char*)src + i);
*(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16);
}
#endif
@@ -1233,6 +1233,7 @@ void
nvfx_fragprog_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog;
struct nvfx_vertex_program* vp;
@@ -1499,17 +1500,17 @@ update:
nvfx->hw_fragprog = fp;
MARK_RING(chan, 8, 1);
- OUT_RING(chan, RING_3D(NV30_3D_FP_ACTIVE_PROGRAM, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_FP_ACTIVE_PROGRAM, 1);
OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR, NV30_3D_FP_ACTIVE_PROGRAM_DMA0,
NV30_3D_FP_ACTIVE_PROGRAM_DMA1);
- OUT_RING(chan, RING_3D(NV30_3D_FP_CONTROL, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_FP_CONTROL, 1);
OUT_RING(chan, fp->fp_control);
if(!nvfx->is_nv4x) {
- OUT_RING(chan, RING_3D(NV30_3D_FP_REG_CONTROL, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_FP_REG_CONTROL, 1);
OUT_RING(chan, (1<<16)|0x4);
- OUT_RING(chan, RING_3D(NV30_3D_TEX_UNITS_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_UNITS_ENABLE, 1);
OUT_RING(chan, fp->samplers);
}
}
@@ -1518,8 +1519,7 @@ update:
unsigned pointsprite_control = fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization;
if(pointsprite_control != nvfx->hw_pointsprite_control)
{
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_POINT_SPRITE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_POINT_SPRITE, 1);
OUT_RING(chan, pointsprite_control);
nvfx->hw_pointsprite_control = pointsprite_control;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c
index fd0aff6a1a..1c4901df0e 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragtex.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c
@@ -177,6 +177,7 @@ void
nvfx_fragtex_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned samplers, unit;
samplers = nvfx->dirty_samplers;
@@ -197,9 +198,8 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx)
else
nv40_fragtex_set(nvfx, unit);
} else {
- WAIT_RING(chan, 2);
/* this is OK for nv40 too */
- OUT_RING(chan, RING_3D(NV30_3D_TEX_ENABLE(unit), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_ENABLE(unit), 1);
OUT_RING(chan, 0);
nvfx->hw_samplers &= ~(1 << unit);
}
diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c
index 8c043b867b..1be84b90a9 100644
--- a/src/gallium/drivers/nvfx/nvfx_miptree.c
+++ b/src/gallium/drivers/nvfx/nvfx_miptree.c
@@ -138,11 +138,11 @@ nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_reso
// on our current driver (and the driver too), format support does not depend on geometry, so don't bother computing it
// TODO: may want to revisit this
- if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_RENDER_TARGET, 0))
+ if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_RENDER_TARGET))
mt->base.base.bind &=~ PIPE_BIND_RENDER_TARGET;
- if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_SAMPLER_VIEW, 0))
+ if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_SAMPLER_VIEW))
mt->base.base.bind &=~ PIPE_BIND_SAMPLER_VIEW;
- if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_DEPTH_STENCIL, 0))
+ if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_DEPTH_STENCIL))
mt->base.base.bind &=~ PIPE_BIND_DEPTH_STENCIL;
return mt;
diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c
index ebf47e6ed3..6391741a2e 100644
--- a/src/gallium/drivers/nvfx/nvfx_push.c
+++ b/src/gallium/drivers/nvfx/nvfx_push.c
@@ -10,6 +10,7 @@
struct push_context {
struct nouveau_channel* chan;
+ struct nouveau_grobj *eng3d;
void *idxbuf;
int32_t idxbias;
@@ -27,9 +28,10 @@ static void
emit_edgeflag(void *priv, boolean enabled)
{
struct push_context* ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
struct nouveau_channel *chan = ctx->chan;
- OUT_RING(chan, RING_3D(NV30_3D_EDGEFLAG, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_EDGEFLAG, 1);
OUT_RING(chan, enabled ? 1 : 0);
}
@@ -37,6 +39,7 @@ static void
emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
{
struct push_context *ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
uint8_t* elts = (uint8_t*)ctx->idxbuf + start;
while(count)
@@ -44,7 +47,7 @@ emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
unsigned push = MIN2(count, ctx->max_vertices_per_packet);
unsigned length = push * ctx->vertex_length;
- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
ctx->chan->cur += length;
@@ -57,6 +60,7 @@ static void
emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
{
struct push_context *ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
uint16_t* elts = (uint16_t*)ctx->idxbuf + start;
while(count)
@@ -64,7 +68,7 @@ emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
unsigned push = MIN2(count, ctx->max_vertices_per_packet);
unsigned length = push * ctx->vertex_length;
- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
ctx->chan->cur += length;
@@ -77,6 +81,7 @@ static void
emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
{
struct push_context *ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
uint32_t* elts = (uint32_t*)ctx->idxbuf + start;
while(count)
@@ -84,7 +89,7 @@ emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
unsigned push = MIN2(count, ctx->max_vertices_per_packet);
unsigned length = push * ctx->vertex_length;
- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
ctx->chan->cur += length;
@@ -97,13 +102,14 @@ static void
emit_vertices(void *priv, unsigned start, unsigned count)
{
struct push_context *ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
while(count)
{
unsigned push = MIN2(count, ctx->max_vertices_per_packet);
unsigned length = push * ctx->vertex_length;
- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
ctx->chan->cur += length;
@@ -116,10 +122,11 @@ static void
emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
{
struct push_context* ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
struct nouveau_channel *chan = ctx->chan;
unsigned nr = (vc & 0xff);
if (nr) {
- OUT_RING(chan, RING_3D(reg, 1));
+ BEGIN_RING(chan, eng3d, reg, 1);
OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -130,7 +137,7 @@ emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
nr -= push;
- OUT_RING(chan, RING_3D_NI(reg, push));
+ BEGIN_RING_NI(chan, eng3d, reg, push);
while (push--) {
OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
@@ -154,12 +161,13 @@ static INLINE void
emit_elt8(void* priv, unsigned start, unsigned vc)
{
struct push_context* ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
struct nouveau_channel *chan = ctx->chan;
uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
int idxbias = ctx->idxbias;
if (vc & 1) {
- OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -168,7 +176,7 @@ emit_elt8(void* priv, unsigned start, unsigned vc)
unsigned i;
unsigned push = MIN2(vc, 2047 * 2);
- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1));
+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
@@ -181,12 +189,13 @@ static INLINE void
emit_elt16(void* priv, unsigned start, unsigned vc)
{
struct push_context* ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
struct nouveau_channel *chan = ctx->chan;
uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
int idxbias = ctx->idxbias;
if (vc & 1) {
- OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -195,7 +204,7 @@ emit_elt16(void* priv, unsigned start, unsigned vc)
unsigned i;
unsigned push = MIN2(vc, 2047 * 2);
- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1));
+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
@@ -208,6 +217,7 @@ static INLINE void
emit_elt32(void* priv, unsigned start, unsigned vc)
{
struct push_context* ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
struct nouveau_channel *chan = ctx->chan;
uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
int idxbias = ctx->idxbias;
@@ -215,8 +225,7 @@ emit_elt32(void* priv, unsigned start, unsigned vc)
while (vc) {
unsigned push = MIN2(vc, 2047);
- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U32, push));
- assert(AVAIL_RING(chan) >= push);
+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U32, push);
if(idxbias)
{
for(unsigned i = 0; i < push; ++i)
@@ -235,6 +244,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct push_context ctx;
struct util_split_prim s;
unsigned instances_left = info->instance_count;
@@ -251,6 +261,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+ 4; /* potential edgeflag enable/disable */
ctx.chan = nvfx->screen->base.channel;
+ ctx.eng3d = nvfx->screen->eng3d;
ctx.translate = nvfx->vtxelt->translate;
ctx.idxbuf = NULL;
ctx.vertex_length = nvfx->vtxelt->vertex_length;
@@ -333,8 +344,9 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
- WAIT_RING(chan, 5);
- nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+ nvfx_emit_vtx_attr(chan, eng3d,
+ nvfx->vtxelt->per_instance[i].base.idx, v,
+ nvfx->vtxelt->per_instance[i].base.ncomp);
}
/* per-instance loop */
@@ -374,15 +386,18 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
int i;
for(i = 0; i < 32; ++i)
{
- OUT_RING(chan, RING_3D(0x1dac, 1));
+ BEGIN_RING(chan, eng3d,
+ 0x1dac, 1);
OUT_RING(chan, 0);
}
}
- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
+ BEGIN_RING(chan, eng3d,
+ NV30_3D_VERTEX_BEGIN_END, 1);
OUT_RING(chan, hw_mode);
done = util_split_prim_next(&s, max_verts);
- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
+ BEGIN_RING(chan, eng3d,
+ NV30_3D_VERTEX_BEGIN_END, 1);
OUT_RING(chan, 0);
if(done)
@@ -406,8 +421,10 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
per_instance[i].step = 0;
nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
- WAIT_RING(chan, 5);
- nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+ nvfx_emit_vtx_attr(chan, eng3d,
+ nvfx->vtxelt->per_instance[i].base.idx,
+ v,
+ nvfx->vtxelt->per_instance[i].base.ncomp);
}
}
}
diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c
index 3935ffd7f9..3cd6bf1e47 100644
--- a/src/gallium/drivers/nvfx/nvfx_query.c
+++ b/src/gallium/drivers/nvfx/nvfx_query.c
@@ -49,6 +49,7 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nvfx_query *q = nvfx_query(pq);
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
uint64_t tmp;
assert(!nvfx->query);
@@ -72,10 +73,9 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
nouveau_notifier_reset(nvfx->screen->query, q->object->start);
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_RESET, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_RESET, 1);
OUT_RING(chan, 1);
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
OUT_RING(chan, 1);
q->ready = FALSE;
@@ -88,15 +88,15 @@ nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct nvfx_query *q = nvfx_query(pq);
assert(nvfx->query == pq);
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_GET, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_GET, 1);
OUT_RING (chan, (0x01 << NV30_3D_QUERY_GET_UNK24__SHIFT) |
((q->object->start * 32) << NV30_3D_QUERY_GET_OFFSET__SHIFT));
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
OUT_RING(chan, 0);
FIRE_RING(chan);
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c
index c60a7bb8b9..42e77c5362 100644
--- a/src/gallium/drivers/nvfx/nvfx_resource.c
+++ b/src/gallium/drivers/nvfx/nvfx_resource.c
@@ -4,13 +4,6 @@
#include "nvfx_resource.h"
#include "nouveau/nouveau_screen.h"
-static unsigned int
-nvfx_resource_is_referenced(struct pipe_context *pipe,
- struct pipe_resource *pr,
- unsigned level, int layer)
-{
- return !!nouveau_reference_flags(nvfx_resource(pr)->bo);
-}
static struct pipe_resource *
nvfx_resource_create(struct pipe_screen *screen,
@@ -58,8 +51,6 @@ nvfx_resource_get_handle(struct pipe_screen *pscreen,
void
nvfx_init_resource_functions(struct pipe_context *pipe)
{
- pipe->is_resource_referenced = nvfx_resource_is_referenced;
-
pipe->create_surface = nvfx_miptree_surface_new;
pipe->surface_destroy = nvfx_miptree_surface_del;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index 92e1d33090..9e6b9d6ef8 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -202,7 +202,7 @@ nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned bind, unsigned geom_flags)
+ unsigned bind)
{
struct nvfx_screen *screen = nvfx_screen(pscreen);
@@ -301,98 +301,100 @@ nvfx_screen_destroy(struct pipe_screen *pscreen)
static void nv30_screen_init(struct nvfx_screen *screen)
{
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
int i;
/* TODO: perhaps we should do some of this on nv40 too? */
for (i=1; i<8; i++) {
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1);
OUT_RING(chan, 0);
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_VERT(i), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_VERT(i), 1);
OUT_RING(chan, 0);
}
- OUT_RING(chan, RING_3D(0x220, 1));
+ BEGIN_RING(chan, eng3d, 0x220, 1);
OUT_RING(chan, 1);
- OUT_RING(chan, RING_3D(0x03b0, 1));
+ BEGIN_RING(chan, eng3d, 0x03b0, 1);
OUT_RING(chan, 0x00100000);
- OUT_RING(chan, RING_3D(0x1454, 1));
+ BEGIN_RING(chan, eng3d, 0x1454, 1);
OUT_RING(chan, 0);
- OUT_RING(chan, RING_3D(0x1d80, 1));
+ BEGIN_RING(chan, eng3d, 0x1d80, 1);
OUT_RING(chan, 3);
- OUT_RING(chan, RING_3D(0x1450, 1));
+ BEGIN_RING(chan, eng3d, 0x1450, 1);
OUT_RING(chan, 0x00030004);
/* NEW */
- OUT_RING(chan, RING_3D(0x1e98, 1));
+ BEGIN_RING(chan, eng3d, 0x1e98, 1);
OUT_RING(chan, 0);
- OUT_RING(chan, RING_3D(0x17e0, 3));
+ BEGIN_RING(chan, eng3d, 0x17e0, 3);
OUT_RING(chan, fui(0.0));
OUT_RING(chan, fui(0.0));
OUT_RING(chan, fui(1.0));
- OUT_RING(chan, RING_3D(0x1f80, 16));
+ BEGIN_RING(chan, eng3d, 0x1f80, 16);
for (i=0; i<16; i++) {
OUT_RING(chan, (i==8) ? 0x0000ffff : 0);
}
- OUT_RING(chan, RING_3D(0x120, 3));
+ BEGIN_RING(chan, eng3d, 0x120, 3);
OUT_RING(chan, 0);
OUT_RING(chan, 1);
OUT_RING(chan, 2);
- OUT_RING(chan, RING_3D(0x1d88, 1));
+ BEGIN_RING(chan, eng3d, 0x1d88, 1);
OUT_RING(chan, 0x00001200);
- OUT_RING(chan, RING_3D(NV30_3D_RC_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_RC_ENABLE, 1);
OUT_RING(chan, 0);
- OUT_RING(chan, RING_3D(NV30_3D_DEPTH_RANGE_NEAR, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_RANGE_NEAR, 2);
OUT_RING(chan, fui(0.0));
OUT_RING(chan, fui(1.0));
- OUT_RING(chan, RING_3D(NV30_3D_MULTISAMPLE_CONTROL, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_MULTISAMPLE_CONTROL, 1);
OUT_RING(chan, 0xffff0000);
/* enables use of vp rather than fixed-function somehow */
- OUT_RING(chan, RING_3D(0x1e94, 1));
+ BEGIN_RING(chan, eng3d, 0x1e94, 1);
OUT_RING(chan, 0x13);
}
static void nv40_screen_init(struct nvfx_screen *screen)
{
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 2));
+ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 2);
OUT_RING(chan, screen->base.channel->vram->handle);
OUT_RING(chan, screen->base.channel->vram->handle);
- OUT_RING(chan, RING_3D(0x1450, 1));
+ BEGIN_RING(chan, eng3d, 0x1450, 1);
OUT_RING(chan, 0x00000004);
- OUT_RING(chan, RING_3D(0x1ea4, 3));
+ BEGIN_RING(chan, eng3d, 0x1ea4, 3);
OUT_RING(chan, 0x00000010);
OUT_RING(chan, 0x01000100);
OUT_RING(chan, 0xff800006);
/* vtxprog output routing */
- OUT_RING(chan, RING_3D(0x1fc4, 1));
+ BEGIN_RING(chan, eng3d, 0x1fc4, 1);
OUT_RING(chan, 0x06144321);
- OUT_RING(chan, RING_3D(0x1fc8, 2));
+ BEGIN_RING(chan, eng3d, 0x1fc8, 2);
OUT_RING(chan, 0xedcba987);
OUT_RING(chan, 0x0000006f);
- OUT_RING(chan, RING_3D(0x1fd0, 1));
+ BEGIN_RING(chan, eng3d, 0x1fd0, 1);
OUT_RING(chan, 0x00171615);
- OUT_RING(chan, RING_3D(0x1fd4, 1));
+ BEGIN_RING(chan, eng3d, 0x1fd4, 1);
OUT_RING(chan, 0x001b1a19);
- OUT_RING(chan, RING_3D(0x1ef8, 1));
+ BEGIN_RING(chan, eng3d, 0x1ef8, 1);
OUT_RING(chan, 0x0020ffff);
- OUT_RING(chan, RING_3D(0x1d64, 1));
+ BEGIN_RING(chan, eng3d, 0x1d64, 1);
OUT_RING(chan, 0x01d300d4);
- OUT_RING(chan, RING_3D(0x1e94, 1));
+ BEGIN_RING(chan, eng3d, 0x1e94, 1);
OUT_RING(chan, 0x00000001);
- OUT_RING(chan, RING_3D(NV40_3D_MIPMAP_ROUNDING, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_MIPMAP_ROUNDING, 1);
OUT_RING(chan, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN);
}
@@ -571,25 +573,25 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
/* Static eng3d initialisation */
/* note that we just started using the channel, so we must have space in the pushbuffer */
- OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_NOTIFY, 1);
OUT_RING(chan, screen->sync->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_TEXTURE0, 2);
OUT_RING(chan, chan->vram->handle);
OUT_RING(chan, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR1, 1);
OUT_RING(chan, chan->vram->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR0, 2);
OUT_RING(chan, chan->vram->handle);
OUT_RING(chan, chan->vram->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_VTXBUF0, 2);
OUT_RING(chan, chan->vram->handle);
OUT_RING(chan, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_FENCE, 2);
OUT_RING(chan, 0);
OUT_RING(chan, screen->query->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_UNK1AC, 2);
OUT_RING(chan, chan->vram->handle);
OUT_RING(chan, chan->vram->handle);
diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c
index 54619037d8..f3dcb205c6 100644
--- a/src/gallium/drivers/nvfx/nvfx_state.c
+++ b/src/gallium/drivers/nvfx/nvfx_state.c
@@ -304,7 +304,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- nvfx->constbuf[shader] = buf;
+ pipe_resource_reference(&nvfx->constbuf[shader], buf);
nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0;
if (shader == PIPE_SHADER_VERTEX) {
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
index 501fdd4430..ae9c31418c 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -7,11 +7,11 @@ void
nvfx_state_viewport_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct pipe_viewport_state *vpt = &nvfx->viewport;
- WAIT_RING(chan, 11);
if(nvfx->render_mode == HW) {
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8);
OUT_RINGf(chan, vpt->translate[0]);
OUT_RINGf(chan, vpt->translate[1]);
OUT_RINGf(chan, vpt->translate[2]);
@@ -20,10 +20,10 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx)
OUT_RINGf(chan, vpt->scale[1]);
OUT_RINGf(chan, vpt->scale[2]);
OUT_RINGf(chan, vpt->scale[3]);
- OUT_RING(chan, RING_3D(0x1d78, 1));
+ BEGIN_RING(chan, eng3d, 0x1d78, 1);
OUT_RING(chan, 1);
} else {
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8);
OUT_RINGf(chan, 0.0f);
OUT_RINGf(chan, 0.0f);
OUT_RINGf(chan, 0.0f);
@@ -32,7 +32,7 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx)
OUT_RINGf(chan, 1.0f);
OUT_RINGf(chan, 1.0f);
OUT_RINGf(chan, 1.0f);
- OUT_RING(chan, RING_3D(0x1d78, 1));
+ BEGIN_RING(chan, eng3d, 0x1d78, 1);
OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1);
}
}
@@ -41,6 +41,7 @@ void
nvfx_state_scissor_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe;
struct pipe_scissor_state *s = &nvfx->scissor;
@@ -48,8 +49,7 @@ nvfx_state_scissor_validate(struct nvfx_context *nvfx)
return;
nvfx->state.scissor_enabled = rast->scissor;
- WAIT_RING(chan, 3);
- OUT_RING(chan, RING_3D(NV30_3D_SCISSOR_HORIZ, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_SCISSOR_HORIZ, 2);
if (nvfx->state.scissor_enabled) {
OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx);
OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny);
@@ -63,12 +63,12 @@ void
nvfx_state_sr_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct pipe_stencil_ref *sr = &nvfx->stencil_ref;
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(0), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(0), 1);
OUT_RING(chan, sr->ref_value[0]);
- OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(1), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(1), 1);
OUT_RING(chan, sr->ref_value[1]);
}
@@ -76,10 +76,10 @@ void
nvfx_state_blend_colour_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct pipe_blend_color *bcol = &nvfx->blend_colour;
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_BLEND_COLOR, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_BLEND_COLOR, 1);
OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) |
(float_to_ubyte(bcol->color[0]) << 16) |
(float_to_ubyte(bcol->color[1]) << 8) |
@@ -90,9 +90,9 @@ void
nvfx_state_stipple_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
- WAIT_RING(chan, 33);
- OUT_RING(chan, RING_3D(NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32));
+ BEGIN_RING(chan, eng3d, NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32);
OUT_RINGp(chan, nvfx->stipple, 32);
}
@@ -100,12 +100,12 @@ static void
nvfx_coord_conventions_validate(struct nvfx_context* nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned value = nvfx->hw_fragprog->coord_conventions;
if(value & NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED)
value |= nvfx->framebuffer.height << NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT;
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_COORD_CONVENTIONS, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_COORD_CONVENTIONS, 1);
OUT_RING(chan, value);
}
@@ -113,6 +113,7 @@ static void
nvfx_ucp_validate(struct nvfx_context* nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned enables[7] =
{
0,
@@ -126,17 +127,15 @@ nvfx_ucp_validate(struct nvfx_context* nvfx)
if(!nvfx->use_vp_clipping)
{
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1);
OUT_RING(chan, 0);
- WAIT_RING(chan, 6 * 4 + 1);
- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANE(0, 0), nvfx->clip.nr * 4));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANE(0, 0),
+ nvfx->clip.nr * 4);
OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4);
}
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1);
OUT_RING(chan, enables[nvfx->clip.nr]);
}
@@ -144,38 +143,37 @@ static void
nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned i;
struct nvfx_vertex_program* vp = nvfx->hw_vertprog;
if(nvfx->clip.nr != vp->clip_nr)
{
unsigned idx;
- WAIT_RING(chan, 14);
/* remove last instruction bit */
if(vp->clip_nr >= 0)
{
idx = vp->nr_insns - 7 + vp->clip_nr;
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
OUT_RING(chan, vp->exec->start + idx);
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
OUT_RINGp (chan, vp->insns[idx].data, 4);
}
/* set last instruction bit */
idx = vp->nr_insns - 7 + nvfx->clip.nr;
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
OUT_RING(chan, vp->exec->start + idx);
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
OUT_RINGp(chan, vp->insns[idx].data, 3);
OUT_RING(chan, vp->insns[idx].data[3] | 1);
vp->clip_nr = nvfx->clip.nr;
}
// TODO: only do this for the ones changed
- WAIT_RING(chan, 6 * 6);
for(i = 0; i < nvfx->clip.nr; ++i)
{
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5);
OUT_RING(chan, vp->data->start + i);
OUT_RINGp (chan, nvfx->clip.ucp[i], 4);
}
@@ -185,6 +183,7 @@ static boolean
nvfx_state_validate_common(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned dirty;
unsigned still_dirty = 0;
int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */
@@ -287,8 +286,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
if(vp_output != nvfx->hw_vp_output)
{
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV40_3D_VP_RESULT_EN, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_VP_RESULT_EN, 1);
OUT_RING(chan, vp_output);
nvfx->hw_vp_output = vp_output;
}
@@ -320,8 +318,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0))
{
- WAIT_RING(chan, 3);
- OUT_RING(chan, RING_3D(NV30_3D_DEPTH_WRITE_ENABLE, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_WRITE_ENABLE, 2);
OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask);
OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled);
}
@@ -334,10 +331,9 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
// TODO: what about nv30?
if(nvfx->is_nv4x)
{
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1);
OUT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1);
OUT_RING(chan, 1);
}
}
@@ -430,7 +426,7 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx)
NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl);
warned = TRUE;
}
- nvfx->pipe.flush(&nvfx->pipe, 0, NULL);
+ nvfx->pipe.flush(&nvfx->pipe, NULL);
nvfx->dirty |= (NVFX_NEW_VIEWPORT |
NVFX_NEW_VERTPROG |
NVFX_NEW_ARRAYS);
diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c
index 816bb89f2c..f9fed94044 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_fb.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c
@@ -96,6 +96,7 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
{
struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
uint32_t rt_enable, rt_format;
int i;
unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
@@ -204,11 +205,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
//printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR0, 1);
OUT_RELOC(chan, rt0->bo, 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV30_3D_COLOR0_PITCH, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_COLOR0_PITCH, 2);
OUT_RING(chan, pitch);
OUT_RELOC(chan, rt0->bo,
rt0->offset, rt_flags | NOUVEAU_BO_LOW,
@@ -216,11 +217,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
}
if (rt_enable & NV30_3D_RT_ENABLE_COLOR1) {
- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR1, 1);
OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV30_3D_COLOR1_OFFSET, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_COLOR1_OFFSET, 2);
OUT_RELOC(chan, nvfx->hw_rt[1].bo,
nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
@@ -230,68 +231,68 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
if(nvfx->is_nv4x)
{
if (rt_enable & NV40_3D_RT_ENABLE_COLOR2) {
- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 1);
OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV40_3D_COLOR2_OFFSET, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_OFFSET, 1);
OUT_RELOC(chan, nvfx->hw_rt[2].bo,
nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
- OUT_RING(chan, RING_3D(NV40_3D_COLOR2_PITCH, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_PITCH, 1);
OUT_RING(chan, nvfx->hw_rt[2].pitch);
}
if (rt_enable & NV40_3D_RT_ENABLE_COLOR3) {
- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR3, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR3, 1);
OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV40_3D_COLOR3_OFFSET, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_OFFSET, 1);
OUT_RELOC(chan, nvfx->hw_rt[3].bo,
nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
- OUT_RING(chan, RING_3D(NV40_3D_COLOR3_PITCH, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_PITCH, 1);
OUT_RING(chan, nvfx->hw_rt[3].pitch);
}
}
if (fb->zsbuf) {
- OUT_RING(chan, RING_3D(NV30_3D_DMA_ZETA, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_DMA_ZETA, 1);
OUT_RELOC(chan, nvfx->hw_zeta.bo, 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV30_3D_ZETA_OFFSET, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_ZETA_OFFSET, 1);
/* TODO: reverse engineer LMA */
OUT_RELOC(chan, nvfx->hw_zeta.bo,
nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0);
if(nvfx->is_nv4x) {
- OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1);
OUT_RING(chan, nvfx->hw_zeta.pitch);
}
}
else if(nvfx->is_nv4x) {
- OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1);
OUT_RING(chan, 64);
}
- OUT_RING(chan, RING_3D(NV30_3D_RT_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_RT_ENABLE, 1);
OUT_RING(chan, rt_enable);
- OUT_RING(chan, RING_3D(NV30_3D_RT_HORIZ, 3));
+ BEGIN_RING(chan, eng3d, NV30_3D_RT_HORIZ, 3);
OUT_RING(chan, (w << 16) | 0);
OUT_RING(chan, (h << 16) | 0);
OUT_RING(chan, rt_format);
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_HORIZ, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_HORIZ, 2);
OUT_RING(chan, (w << 16) | 0);
OUT_RING(chan, (h << 16) | 0);
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2);
OUT_RING(chan, ((w - 1) << 16) | 0);
OUT_RING(chan, ((h - 1) << 16) | 0);
if(!nvfx->is_nv4x) {
/* Wonder why this is needed, context should all be set to zero on init */
/* TODO: we can most likely remove this, after putting it in context init */
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TX_ORIGIN, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TX_ORIGIN, 1);
OUT_RING(chan, 0);
}
nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER;
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
index 6fd6c47081..be31853d71 100644
--- a/src/gallium/drivers/nvfx/nvfx_surface.c
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -168,8 +168,8 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy)
if(nvfx->query && !nvfx->blitters_in_use)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
OUT_RING(chan, 0);
}
@@ -209,8 +209,8 @@ nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter)
if(nvfx->query && !nvfx->blitters_in_use)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
OUT_RING(chan, 1);
}
}
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c
index 597664e771..b72379d653 100644
--- a/src/gallium/drivers/nvfx/nvfx_vbo.c
+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
@@ -2,6 +2,7 @@
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/u_transfer.h"
#include "translate/translate.h"
#include "nvfx_context.h"
@@ -9,8 +10,7 @@
#include "nvfx_resource.h"
#include "nouveau/nouveau_channel.h"
-
-#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nv04_pushbuf.h"
static inline unsigned
util_guess_unique_indices_count(unsigned mode, unsigned indices)
@@ -247,6 +247,7 @@ boolean
nvfx_vbo_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
int i;
int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
@@ -262,11 +263,11 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
float v[4];
ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
- nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
+ nvfx_emit_vtx_attr(chan, eng3d, ve->idx, v, ve->ncomp);
}
- OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements);
if(nvfx->use_vertex_buffers)
{
unsigned idx = 0;
@@ -297,12 +298,12 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
unsigned i;
/* seems to be some kind of cache flushing */
for(i = 0; i < 3; ++i) {
- OUT_RING(chan, RING_3D(0x1718, 1));
+ BEGIN_RING(chan, eng3d, 0x1718, 1);
OUT_RING(chan, 0);
}
}
- OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements);
if(nvfx->use_vertex_buffers)
{
unsigned idx = 0;
@@ -330,7 +331,7 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
OUT_RING(chan, 0);
}
- OUT_RING(chan, RING_3D(0x1710, 1));
+ BEGIN_RING(chan, eng3d, 0x1710, 1);
OUT_RING(chan, 0);
nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
@@ -342,15 +343,14 @@ void
nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned num_outputs = nvfx->vertprog->draw_elements;
int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr);
if (!elements)
return;
- WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2);
-
- OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements);
for(unsigned i = 0; i < num_outputs; ++i)
OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT);
for(unsigned i = num_outputs; i < elements; ++i)
@@ -360,16 +360,16 @@ nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
unsigned i;
/* seems to be some kind of cache flushing */
for(i = 0; i < 3; ++i) {
- OUT_RING(chan, RING_3D(0x1718, 1));
+ BEGIN_RING(chan, eng3d, 0x1718, 1);
OUT_RING(chan, 0);
}
}
- OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements);
for (unsigned i = 0; i < elements; i++)
OUT_RING(chan, 0);
- OUT_RING(chan, RING_3D(0x1710, 1));
+ BEGIN_RING(chan, eng3d, 0x1710, 1);
OUT_RING(chan, 0);
nvfx->hw_vtxelt_nr = num_outputs;
@@ -592,18 +592,10 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- for(unsigned i = 0; i < count; ++i)
- {
- pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
- nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
- nvfx->vtxbuf[i].max_index = vb[i].max_index;
- nvfx->vtxbuf[i].stride = vb[i].stride;
- }
-
- for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
- pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
+ util_copy_vertex_buffers(nvfx->vtxbuf,
+ &nvfx->vtxbuf_nr,
+ vb, count);
- nvfx->vtxbuf_nr = count;
nvfx->use_vertex_buffers = -1;
nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
}
@@ -640,4 +632,6 @@ nvfx_init_vbo_functions(struct nvfx_context *nvfx)
nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
+
+ nvfx->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
index e543fda50e..a11941f3d5 100644
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -1182,6 +1182,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
{
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog;
struct nvfx_vertex_program* vp;
struct pipe_resource *constbuf;
@@ -1341,7 +1342,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
}
*/
- WAIT_RING(chan, 6 * vp->nr_consts);
for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) {
struct nvfx_vertex_program_data *vpd = &vp->consts[i];
@@ -1356,7 +1356,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
//printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]);
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5);
OUT_RING(chan, i + vp->data->start);
OUT_RINGp(chan, (uint32_t *)vpd->value, 4);
}
@@ -1364,11 +1364,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
/* Upload vtxprog */
if (upload_code) {
- WAIT_RING(chan, 2 + 5 * vp->nr_insns);
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
OUT_RING(chan, vp->exec->start);
for (i = 0; i < vp->nr_insns; i++) {
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
//printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
OUT_RINGp(chan, vp->insns[i].data, 4);
}
@@ -1377,11 +1376,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
if(nvfx->dirty & (NVFX_NEW_VERTPROG))
{
- WAIT_RING(chan, 6);
- OUT_RING(chan, RING_3D(NV30_3D_VP_START_FROM_ID, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_START_FROM_ID, 1);
OUT_RING(chan, vp->exec->start);
if(nvfx->is_nv4x) {
- OUT_RING(chan, RING_3D(NV40_3D_VP_ATTRIB_EN, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_VP_ATTRIB_EN, 1);
OUT_RING(chan, vp->ir);
}
}
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 6e886433bc..37b635fd12 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -22,7 +22,6 @@
#include "r300_context.h"
#include "r300_emit.h"
-#include "r300_hyperz.h"
#include "r300_texture.h"
#include "r300_winsys.h"
@@ -55,11 +54,12 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o
util_blitter_save_viewport(r300->blitter, &r300->viewport);
util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state);
util_blitter_save_vertex_elements(r300->blitter, r300->velems);
- util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count,
- r300->vertex_buffer);
+ util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers,
+ r300->vbuf_mgr->vertex_buffer);
- if (op & (R300_CLEAR_SURFACE | R300_COPY))
+ if (op & (R300_CLEAR_SURFACE | R300_COPY)) {
util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
+ }
if (op & R300_COPY) {
struct r300_textures_state* state =
@@ -108,6 +108,22 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
return r300_surface(fb->cbufs[0])->cbzb_allowed;
}
+static boolean r300_fast_zclear_allowed(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+ return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level];
+}
+
+static boolean r300_hiz_clear_allowed(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+ return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level];
+}
+
static uint32_t r300_depth_clear_value(enum pipe_format format,
double depth, unsigned stencil)
{
@@ -125,6 +141,13 @@ static uint32_t r300_depth_clear_value(enum pipe_format format,
}
}
+static uint32_t r300_hiz_clear_value(double depth)
+{
+ uint32_t r = (uint32_t)(CLAMP(depth, 0, 1) * 255.5);
+ assert(r <= 255);
+ return r | (r << 8) | (r << 16) | (r << 24);
+}
+
/* Clear currently bound buffers. */
static void r300_clear(struct pipe_context* pipe,
unsigned buffers,
@@ -132,37 +155,46 @@ static void r300_clear(struct pipe_context* pipe,
double depth,
unsigned stencil)
{
- /* My notes about fastfill:
+ /* My notes about Zbuffer compression:
*
- * 1) Only the zbuffer is cleared.
+ * 1) The zbuffer must be micro-tiled and whole microtiles must be
+ * written if compression is enabled. If microtiling is disabled,
+ * it locks up.
*
- * 2) The zbuffer must be micro-tiled and whole microtiles must be
- * written. If microtiling is disabled, it locks up.
+ * 2) There is ZMASK RAM which contains a compressed zbuffer.
+ * Each dword of the Z Mask contains compression information
+ * for 16 4x4 pixel tiles, that is 2 bits for each tile.
+ * On chips with 2 Z pipes, every other dword maps to a different
+ * pipe. On newer chipsets, there is a new compression mode
+ * with 8x8 pixel tiles per 2 bits.
*
- * 3) There is Z Mask RAM which contains a compressed zbuffer and
- * it interacts with fastfill. We should figure out how to use it
- * to get more performance.
- * This is what we know about the Z Mask:
+ * 3) The FASTFILL bit has nothing to do with filling. It only tells hw
+ * it should look in the ZMASK RAM first before fetching from a real
+ * zbuffer.
*
- * Each dword of the Z Mask contains compression information
- * for 16 4x4 pixel blocks, that is 2 bits for each block.
- * On chips with 2 Z pipes, every other dword maps to a different
- * pipe.
+ * 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned
+ * during zbuffer reads instead of the value that is actually stored
+ * in the zbuffer memory. A pixel is in a cleared state when its ZMASK
+ * is equal to 0. Therefore, if you clear ZMASK with zeros, you may
+ * leave the zbuffer memory uninitialized, but then you must enable
+ * compression, so that the ZMASK RAM is actually used.
*
- * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must
- * be equal to 0. (clear the Z Mask RAM with zeros)
+ * 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed
+ * during zbuffer updates. A special decompressing operation should be
+ * used to fully decompress a zbuffer, which basically just stores all
+ * compressed tiles in ZMASK to the zbuffer memory.
*
- * 5) For 16-bit zbuffer, compression causes a hung with one or
+ * 6) For a 16-bit zbuffer, compression causes a hung with one or
* two samples and should not be used.
*
- * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
+ * 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
* to avoid needless decompression.
*
- * 7) Fastfill must not be used if reading of compressed Z data is disabled
+ * 8) Fastfill must not be used if reading of compressed Z data is disabled
* and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
* i.e. it cannot be used to compress the zbuffer.
*
- * 8) ZB_CB_CLEAR does not interact with fastfill in any way.
+ * 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way.
*
* - Marek
*/
@@ -172,8 +204,6 @@ static void r300_clear(struct pipe_context* pipe,
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_hyperz_state *hyperz =
(struct r300_hyperz_state*)r300->hyperz_state.state;
- struct r300_texture *zstex =
- fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
uint32_t width = fb->width;
uint32_t height = fb->height;
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
@@ -182,16 +212,18 @@ static void r300_clear(struct pipe_context* pipe,
/* Enable fast Z clear.
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */
if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) {
- hyperz_dcv = hyperz->zb_depthclearvalue =
- r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
+ if (r300_fast_zclear_allowed(r300)) {
+ hyperz_dcv = hyperz->zb_depthclearvalue =
+ r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
- r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG);
- if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) {
r300_mark_atom_dirty(r300, &r300->zmask_clear);
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
}
- if (zstex->hiz_mem[fb->zsbuf->u.tex.level])
+
+ if (r300_hiz_clear_allowed(r300)) {
+ r300->hiz_clear_value = r300_hiz_clear_value(depth);
r300_mark_atom_dirty(r300, &r300->hiz_clear);
+ }
}
/* Enable CBZB clear. */
@@ -205,7 +237,7 @@ static void r300_clear(struct pipe_context* pipe,
height = surf->cbzb_height;
r300->cbzb_clear = TRUE;
- r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
/* Clear. */
@@ -218,26 +250,28 @@ static void r300_clear(struct pipe_context* pipe,
fb->nr_cbufs,
buffers, rgba, depth, stencil);
r300_blitter_end(r300);
- } else if (r300->zmask_clear.dirty) {
- /* Just clear zmask and hiz now, this does not use a standard draw
+ } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) {
+ /* Just clear zmask and hiz now, this does not use the standard draw
* procedure. */
unsigned dwords;
/* Calculate zmask_clear and hiz_clear atom sizes. */
r300_update_hyperz_state(r300);
- dwords = r300->zmask_clear.size +
+ dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) +
(r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
r300_get_num_cs_end_dwords(r300);
/* Reserve CS space. */
if (dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
- r300->context.flush(&r300->context, 0, NULL);
+ r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL);
}
/* Emit clear packets. */
- r300_emit_zmask_clear(r300, r300->zmask_clear.size,
- r300->zmask_clear.state);
- r300->zmask_clear.dirty = FALSE;
+ if (r300->zmask_clear.dirty) {
+ r300_emit_zmask_clear(r300, r300->zmask_clear.size,
+ r300->zmask_clear.state);
+ r300->zmask_clear.dirty = FALSE;
+ }
if (r300->hiz_clear.dirty) {
r300_emit_hiz_clear(r300, r300->hiz_clear.size,
r300->hiz_clear.state);
@@ -251,16 +285,14 @@ static void r300_clear(struct pipe_context* pipe,
if (r300->cbzb_clear) {
r300->cbzb_clear = FALSE;
hyperz->zb_depthclearvalue = hyperz_dcv;
- r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
/* Enable fastfill and/or hiz.
*
* If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
- * looks if zmask/hiz is in use and enables fastfill accordingly. */
- if (zstex &&
- (zstex->zmask_in_use[fb->zsbuf->u.tex.level] ||
- zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
+ * looks if zmask/hiz is in use and programs hardware accordingly. */
+ if (r300->zmask_in_use || r300->hiz_in_use) {
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
@@ -274,10 +306,16 @@ static void r300_clear_render_target(struct pipe_context *pipe,
{
struct r300_context *r300 = r300_context(pipe);
+ r300->hyperz_locked = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_render_target(r300->blitter, dst, rgba,
dstx, dsty, width, height);
r300_blitter_end(r300);
+
+ r300->hyperz_locked = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
/* Clear a region of a depth stencil surface. */
@@ -290,42 +328,70 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
unsigned width, unsigned height)
{
struct r300_context *r300 = r300_context(pipe);
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+ if (r300->zmask_in_use && !r300->hyperz_locked) {
+ if (fb->zsbuf->texture == dst->texture) {
+ r300_decompress_zmask(r300);
+ } else {
+ r300->hyperz_locked = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
+ }
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil,
dstx, dsty, width, height);
r300_blitter_end(r300);
+
+ if (r300->hyperz_locked) {
+ r300->hyperz_locked = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
}
-/* Flush a depth stencil buffer. */
-void r300_flush_depth_stencil(struct pipe_context *pipe,
- struct pipe_resource *dst,
- unsigned level,
- unsigned layer)
+void r300_decompress_zmask(struct r300_context *r300)
{
- struct r300_context *r300 = r300_context(pipe);
- struct pipe_surface *dstsurf, surf_tmpl;
- struct r300_texture *tex = r300_texture(dst);
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
- if (!tex->zmask_mem[level])
- return;
- if (!tex->zmask_in_use[level])
+ if (!r300->zmask_in_use || r300->hyperz_locked)
return;
- surf_tmpl.format = dst->format;
- surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
- surf_tmpl.u.tex.level = level;
- surf_tmpl.u.tex.first_layer = layer;
- surf_tmpl.u.tex.last_layer = layer;
- dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl);
+ r300->zmask_decompress = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
- r300->z_decomp_rd = TRUE;
- r300_blitter_begin(r300, R300_CLEAR_SURFACE);
- util_blitter_flush_depth_stencil(r300->blitter, dstsurf);
+ r300_blitter_begin(r300, R300_CLEAR);
+ util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0,
+ r300->dsa_decompress_zmask);
r300_blitter_end(r300);
- r300->z_decomp_rd = FALSE;
- tex->zmask_in_use[level] = FALSE;
+ r300->zmask_decompress = FALSE;
+ r300->zmask_in_use = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+}
+
+void r300_decompress_zmask_locked_unsafe(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state fb = {0};
+ fb.width = r300->locked_zbuffer->width;
+ fb.height = r300->locked_zbuffer->height;
+ fb.nr_cbufs = 0;
+ fb.zsbuf = r300->locked_zbuffer;
+
+ r300->context.set_framebuffer_state(&r300->context, &fb);
+ r300_decompress_zmask(r300);
+}
+
+void r300_decompress_zmask_locked(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state saved_fb = {0};
+
+ util_copy_framebuffer_state(&saved_fb, r300->fb_state.state);
+ r300_decompress_zmask_locked_unsafe(r300);
+ r300->context.set_framebuffer_state(&r300->context, &saved_fb);
+ util_unreference_framebuffer_state(&saved_fb);
}
/* Copy a block of pixels from one surface to another using HW. */
@@ -340,8 +406,6 @@ static void r300_hw_copy_region(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
r300_blitter_begin(r300, R300_COPY);
-
- /* Do a copy */
util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box, TRUE);
r300_blitter_end(r300);
@@ -356,54 +420,103 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
unsigned src_level,
const struct pipe_box *src_box)
{
- enum pipe_format old_format = dst->format;
- enum pipe_format new_format = old_format;
- boolean is_depth;
- if (!pipe->screen->is_format_supported(pipe->screen,
- old_format, src->target,
- src->nr_samples,
- PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_SAMPLER_VIEW, 0) &&
- util_format_is_plain(old_format)) {
- switch (util_format_get_blocksize(old_format)) {
+ struct r300_context *r300 = r300_context(pipe);
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct pipe_resource old_src = *src;
+ struct pipe_resource old_dst = *dst;
+ struct pipe_resource new_src = old_src;
+ struct pipe_resource new_dst = old_dst;
+ const struct util_format_description *desc =
+ util_format_description(dst->format);
+ struct pipe_box box;
+
+ if (r300->zmask_in_use && !r300->hyperz_locked) {
+ if (fb->zsbuf->texture == src ||
+ fb->zsbuf->texture == dst) {
+ r300_decompress_zmask(r300);
+ } else {
+ r300->hyperz_locked = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
+ }
+
+ /* Handle non-renderable plain formats. */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+ (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
+ !pipe->screen->is_format_supported(pipe->screen,
+ src->format, src->target,
+ src->nr_samples,
+ PIPE_BIND_SAMPLER_VIEW) ||
+ !pipe->screen->is_format_supported(pipe->screen,
+ dst->format, dst->target,
+ dst->nr_samples,
+ PIPE_BIND_RENDER_TARGET))) {
+ switch (util_format_get_blocksize(old_dst.format)) {
case 1:
- new_format = PIPE_FORMAT_I8_UNORM;
+ new_dst.format = PIPE_FORMAT_I8_UNORM;
break;
case 2:
- new_format = PIPE_FORMAT_B4G4R4A4_UNORM;
+ new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM;
break;
case 4:
- new_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM;
break;
case 8:
- new_format = PIPE_FORMAT_R16G16B16A16_UNORM;
+ new_dst.format = PIPE_FORMAT_R16G16B16A16_UNORM;
break;
default:
debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n"
"r300: surface_copy: Software fallback doesn't work for tiled textures.\n",
- util_format_short_name(old_format));
+ util_format_short_name(dst->format));
}
+ new_src.format = new_dst.format;
}
- is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
- if (is_depth) {
- r300_flush_depth_stencil(pipe, src, src_level, src_box->z);
- }
- if (old_format != new_format) {
- r300_texture_reinterpret_format(pipe->screen,
- dst, new_format);
- r300_texture_reinterpret_format(pipe->screen,
- src, new_format);
+ /* Handle compressed formats. */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC ||
+ desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ switch (util_format_get_blocksize(old_dst.format)) {
+ case 8:
+ /* 1 pixel = 4 bits,
+ * we set 1 pixel = 2 bytes ===> 4 times larger pixels. */
+ new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM;
+ break;
+ case 16:
+ /* 1 pixel = 8 bits,
+ * we set 1 pixel = 4 bytes ===> 4 times larger pixels. */
+ new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ break;
+ }
+
+ /* Since the pixels are 4 times larger, we must decrease
+ * the image size and the coordinates 4 times. */
+ new_src.format = new_dst.format;
+ new_dst.height0 = (new_dst.height0 + 3) / 4;
+ new_src.height0 = (new_src.height0 + 3) / 4;
+ dsty /= 4;
+ box = *src_box;
+ box.y /= 4;
+ box.height = (box.height + 3) / 4;
+ src_box = &box;
}
+ if (old_src.format != new_src.format)
+ r300_resource_set_properties(pipe->screen, src, 0, &new_src);
+ if (old_dst.format != new_dst.format)
+ r300_resource_set_properties(pipe->screen, dst, 0, &new_dst);
+
r300_hw_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
- if (old_format != new_format) {
- r300_texture_reinterpret_format(pipe->screen,
- dst, old_format);
- r300_texture_reinterpret_format(pipe->screen,
- src, old_format);
+ if (old_src.format != new_src.format)
+ r300_resource_set_properties(pipe->screen, src, 0, &old_src);
+ if (old_dst.format != new_dst.format)
+ r300_resource_set_properties(pipe->screen, dst, 0, &old_dst);
+
+ if (r300->hyperz_locked) {
+ r300->hyperz_locked = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h
index 9d3d4fc1b1..b373937a1f 100644
--- a/src/gallium/drivers/r300/r300_cb.h
+++ b/src/gallium/drivers/r300/r300_cb.h
@@ -61,40 +61,52 @@
* that they neatly hide away, and don't have the cost of function setup, so
* we're going to use them. */
-#ifdef DEBUG
-#define CB_DEBUG(x) x
-#else
-#define CB_DEBUG(x)
-#endif
-
-
/**
* Command buffer setup.
*/
+#ifdef DEBUG
+
#define CB_LOCALS \
- CB_DEBUG(int cs_count = 0;) \
+ int cs_count = 0; \
uint32_t *cs_ptr = NULL; \
- CB_DEBUG((void) cs_count;) (void) cs_ptr;
+ (void) cs_count; (void) cs_ptr
-#define NEW_CB(ptr, size) do { \
- assert(sizeof(*ptr) == sizeof(uint32_t)); \
- cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \
- CB_DEBUG(cs_count = size;) \
+#define BEGIN_CB(ptr, size) do { \
+ assert(sizeof(*(ptr)) == sizeof(uint32_t)); \
+ cs_count = (size); \
+ cs_ptr = (ptr); \
} while (0)
-#define BEGIN_CB(ptr, size) do { \
- assert(sizeof(*ptr) == sizeof(uint32_t)); \
- cs_ptr = ptr; \
- CB_DEBUG(cs_count = size;) \
+#define NEW_CB(ptr, size) \
+ do { \
+ assert(sizeof(*(ptr)) == sizeof(uint32_t)); \
+ cs_count = (size); \
+ cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \
} while (0)
#define END_CB do { \
- CB_DEBUG(if (cs_count != 0) \
+ if (cs_count != 0) \
debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
- cs_count, __FUNCTION__, __FILE__, __LINE__);) \
+ cs_count, __FUNCTION__, __FILE__, __LINE__); \
} while (0)
+#define CB_USED_DW(x) cs_count -= x
+
+#else
+
+#define CB_LOCALS \
+ uint32_t *cs_ptr = NULL; (void) cs_ptr
+
+#define NEW_CB(ptr, size) \
+ cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t))
+
+#define BEGIN_CB(ptr, size) cs_ptr = (ptr)
+#define END_CB
+#define CB_USED_DW(x)
+
+#endif
+
/**
* Storing pure DWORDs.
@@ -103,13 +115,13 @@
#define OUT_CB(value) do { \
*cs_ptr = (value); \
cs_ptr++; \
- CB_DEBUG(cs_count--;) \
+ CB_USED_DW(1); \
} while (0)
#define OUT_CB_TABLE(values, count) do { \
memcpy(cs_ptr, values, count * sizeof(uint32_t)); \
cs_ptr += count; \
- CB_DEBUG(cs_count -= count;) \
+ CB_USED_DW(count); \
} while (0)
#define OUT_CB_32F(value) \
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 583e981a4d..990acea9f4 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -241,7 +241,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5A42:
caps->family = CHIP_FAMILY_RS400;
caps->has_tcl = FALSE;
- caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
case 0x5A61:
@@ -256,8 +255,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RS690;
caps->has_tcl = FALSE;
caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x793F:
@@ -266,8 +263,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RS600;
caps->has_tcl = FALSE;
caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x796C:
@@ -277,8 +272,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RS740;
caps->has_tcl = FALSE;
caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x7100:
@@ -366,7 +359,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RV530;
caps->num_vert_fpus = 5;
caps->is_r500 = TRUE;
- /*caps->hiz_ram = RV530_HIZ_LIMIT;*/
+ caps->hiz_ram = RV530_HIZ_LIMIT;
caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
@@ -424,5 +417,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
}
caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350;
+ caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4;
caps->dxtc_swizzle = caps->is_r400 || caps->is_r500;
}
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index f2035d2009..68943d561b 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -33,6 +33,13 @@
#define PIPE_ZMASK_SIZE 4096
#define RV3xx_ZMASK_SIZE 5120
+/* The size of a compressed tile. Each compressed tile takes 2 bits
+ * in the ZMASK RAM, so there is always 16 tiles per one dword. */
+enum r300_zmask_compression {
+ R300_ZCOMP_4X4 = 4,
+ R300_ZCOMP_8X8 = 8
+};
+
/* Structure containing all the possible information about a specific Radeon
* in the R3xx, R4xx, and R5xx families. */
struct r300_capabilities {
@@ -50,10 +57,12 @@ struct r300_capabilities {
unsigned num_tex_units;
/* Whether or not TCL is physically present */
boolean has_tcl;
- /* Some chipsets do not have HiZ RAM - other have varying amounts . */
+ /* Some chipsets do not have HiZ RAM - other have varying amounts. */
int hiz_ram;
- /* some chipsets have zmask ram per pipe some don't */
+ /* Some chipsets have zmask ram per pipe some don't. */
int zmask_ram;
+ /* Compression mode for ZMASK. */
+ enum r300_zmask_compression z_compress;
/* Whether or not this is RV350 or newer, including all r400 and r500
* chipsets. The differences compared to the oldest r300 chips are:
* - Blend LTE/GTE thresholds
@@ -81,8 +90,6 @@ struct r300_capabilities {
boolean high_second_pipe;
/* DXTC texture swizzling. */
boolean dxtc_swizzle;
- /* Index bias (AKA index offset). */
- boolean index_bias_supported;
};
/* Enumerations for legibility and telling which card we're running on. */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 67b011a145..720d666d98 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -30,29 +30,28 @@
#include "r300_cb.h"
#include "r300_context.h"
#include "r300_emit.h"
-#include "r300_hyperz.h"
#include "r300_screen.h"
#include "r300_screen_buffer.h"
#include "r300_winsys.h"
-#include <inttypes.h>
-
static void r300_update_num_contexts(struct r300_screen *r300screen,
int diff)
{
+ pipe_mutex_lock(r300screen->num_contexts_mutex);
if (diff > 0) {
- p_atomic_inc(&r300screen->num_contexts);
+ r300screen->num_contexts++;
if (r300screen->num_contexts > 1)
util_slab_set_thread_safety(&r300screen->pool_buffers,
UTIL_SLAB_MULTITHREADED);
} else {
- p_atomic_dec(&r300screen->num_contexts);
+ r300screen->num_contexts--;
if (r300screen->num_contexts <= 1)
util_slab_set_thread_safety(&r300screen->pool_buffers,
UTIL_SLAB_SINGLETHREADED);
}
+ pipe_mutex_unlock(r300screen->num_contexts_mutex);
}
static void r300_release_referenced_objects(struct r300_context *r300)
@@ -79,22 +78,18 @@ static void r300_release_referenced_objects(struct r300_context *r300)
NULL);
}
- /* The dummy VBO. */
+ /* Manually-created vertex buffers. */
pipe_resource_reference(&r300->dummy_vb, NULL);
-
- /* The SWTCL VBO. */
pipe_resource_reference(&r300->vbo, NULL);
- /* Vertex buffers. */
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
- }
-
/* If there are any queries pending or not destroyed, remove them now. */
foreach_s(query, temp, &r300->query_list) {
remove_from_list(query);
FREE(query);
}
+
+ r300->context.delete_depth_stencil_alpha_state(&r300->context,
+ r300->dsa_decompress_zmask);
}
static void r300_destroy_context(struct pipe_context* context)
@@ -106,20 +101,12 @@ static void r300_destroy_context(struct pipe_context* context)
if (r300->draw)
draw_destroy(r300->draw);
- if (r300->upload_vb)
- u_upload_destroy(r300->upload_vb);
- if (r300->upload_ib)
- u_upload_destroy(r300->upload_ib);
-
- if (r300->tran.translate_cache)
- translate_cache_destroy(r300->tran.translate_cache);
+ if (r300->vbuf_mgr)
+ u_vbuf_mgr_destroy(r300->vbuf_mgr);
/* XXX: This function assumes r300->query_list was initialized */
r300_release_referenced_objects(r300);
- if (r300->zmask_mm)
- r300_hyperz_destroy_mm(r300);
-
if (r300->cs)
r300->rws->cs_destroy(r300->cs);
@@ -152,11 +139,11 @@ static void r300_destroy_context(struct pipe_context* context)
FREE(r300);
}
-void r300_flush_cb(void *data)
+static void r300_flush_callback(void *data, unsigned flags)
{
struct r300_context* const cs_context_copy = data;
- cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL);
+ r300_flush(&cs_context_copy->context, flags, NULL);
}
#define R300_INIT_ATOM(atomname, atomsize) \
@@ -180,7 +167,6 @@ static boolean r300_setup_atoms(struct r300_context* r300)
boolean is_rv350 = r300->screen->caps.is_rv350;
boolean is_r500 = r300->screen->caps.is_r500;
boolean has_tcl = r300->screen->caps.has_tcl;
- boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0);
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0;
@@ -206,18 +192,18 @@ static boolean r300_setup_atoms(struct r300_context* r300)
/* ZB (unpipelined), SC. */
R300_INIT_ATOM(ztop_state, 2);
/* ZB, FG. */
- R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6);
+ R300_INIT_ATOM(dsa_state, is_r500 ? (drm_2_6_0 ? 10 : 8) : 6);
/* RB3D. */
R300_INIT_ATOM(blend_state, 8);
R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2);
/* SC. */
R300_INIT_ATOM(scissor_state, 3);
/* GB, FG, GA, SU, SC, RB3D. */
- R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0));
+ R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0));
/* VAP. */
R300_INIT_ATOM(viewport_state, 9);
R300_INIT_ATOM(pvs_flush, 2);
- R300_INIT_ATOM(vap_invariant_state, 9);
+ R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9);
R300_INIT_ATOM(vertex_stream_state, 0);
R300_INIT_ATOM(vs_state, 0);
R300_INIT_ATOM(vs_constants, 0);
@@ -226,7 +212,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
R300_INIT_ATOM(rs_block_state, 0);
R300_INIT_ATOM(rs_state, 0);
/* SC, US. */
- R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0));
+ R300_INIT_ATOM(fb_state_pipelined, 8);
/* US. */
R300_INIT_ATOM(fs, 0);
R300_INIT_ATOM(fs_rc_constant_state, 0);
@@ -237,9 +223,9 @@ static boolean r300_setup_atoms(struct r300_context* r300)
if (can_hyperz) {
/* HiZ Clear */
if (has_hiz_ram)
- R300_INIT_ATOM(hiz_clear, 0);
+ R300_INIT_ATOM(hiz_clear, 4);
/* zmask clear */
- R300_INIT_ATOM(zmask_clear, 0);
+ R300_INIT_ATOM(zmask_clear, 4);
}
/* ZB (unpipelined), SU. */
R300_INIT_ATOM(query_start, 4);
@@ -341,7 +327,7 @@ static void r300_init_states(struct pipe_context *pipe)
/* Initialize the VAP invariant state. */
{
- BEGIN_CB(vap_invariant->cb, 9);
+ BEGIN_CB(vap_invariant->cb, r300->vap_invariant_state.size);
OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff);
OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
OUT_CB_32F(1.0);
@@ -349,6 +335,10 @@ static void r300_init_states(struct pipe_context *pipe)
OUT_CB_32F(1.0);
OUT_CB_32F(1.0);
OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
+
+ if (r300->screen->caps.is_r500) {
+ OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0);
+ }
END_CB;
}
@@ -357,17 +347,22 @@ static void r300_init_states(struct pipe_context *pipe)
BEGIN_CB(invariant->cb, r300->invariant_state.size);
OUT_CB_REG(R300_GB_SELECT, 0);
OUT_CB_REG(R300_FG_FOG_BLEND, 0);
- OUT_CB_REG(R300_GA_ROUND_MODE, 1);
OUT_CB_REG(R300_GA_OFFSET, 0);
OUT_CB_REG(R300_SU_TEX_WRAP, 0);
OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0);
OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525);
+ OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff);
if (r300->screen->caps.is_rv350) {
OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
}
+
+ if (r300->screen->caps.is_r500) {
+ OUT_CB_REG(R500_GA_COLOR_CONTROL_PS3, 0);
+ OUT_CB_REG(R500_SU_TEX_WRAP_PS3, 0);
+ }
END_CB;
}
@@ -443,39 +438,23 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300_init_state_functions(r300);
r300_init_resource_functions(r300);
+ r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16,
+ PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER,
+ U_VERTEX_FETCH_DWORD_ALIGNED);
+ if (!r300->vbuf_mgr)
+ goto fail;
+
r300->blitter = util_blitter_create(&r300->context);
if (r300->blitter == NULL)
goto fail;
/* Render functions must be initialized after blitter. */
r300_init_render_functions(r300);
-
- rws->cs_set_flush(r300->cs, r300_flush_cb, r300);
-
- /* setup hyper-z mm */
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- if (!r300_hyperz_init_mm(r300))
- goto fail;
-
- r300->upload_ib = u_upload_create(&r300->context,
- 32 * 1024, 16,
- PIPE_BIND_INDEX_BUFFER);
-
- if (r300->upload_ib == NULL)
- goto fail;
-
- r300->upload_vb = u_upload_create(&r300->context,
- 128 * 1024, 16,
- PIPE_BIND_VERTEX_BUFFER);
- if (r300->upload_vb == NULL)
- goto fail;
-
- r300->tran.translate_cache = translate_cache_create();
- if (r300->tran.translate_cache == NULL)
- goto fail;
-
r300_init_states(&r300->context);
+ rws->cs_set_flush(r300->cs, r300_flush_callback, r300);
+
/* The KIL opcode needs the first texture unit to be enabled
* on r3xx-r4xx. In order to calm down the CS checker, we bind this
* dummy texture there. */
@@ -502,7 +481,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
}
{
- struct pipe_resource vb = {};
+ struct pipe_resource vb;
+ memset(&vb, 0, sizeof(vb));
vb.target = PIPE_BUFFER;
vb.format = PIPE_FORMAT_R8_UNORM;
vb.bind = PIPE_BIND_VERTEX_BUFFER;
@@ -514,36 +494,45 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->dummy_vb = screen->resource_create(screen, &vb);
}
+ {
+ struct pipe_depth_stencil_alpha_state dsa;
+ memset(&dsa, 0, sizeof(dsa));
+ dsa.depth.writemask = 1;
+
+ r300->dsa_decompress_zmask =
+ r300->context.create_depth_stencil_alpha_state(&r300->context,
+ &dsa);
+ }
+
+ /* Print driver info. */
+#ifdef DEBUG
+ {
+#else
+ if (DBG_ON(r300, DBG_INFO)) {
+#endif
+ fprintf(stderr,
+ "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n"
+ "r300: GART size: %d MB, VRAM size: %d MB\n"
+ "r300: AA compression: %s, Z compression: %s, HiZ: %s\n",
+ rws->get_value(rws, R300_VID_DRM_MAJOR),
+ rws->get_value(rws, R300_VID_DRM_MINOR),
+ rws->get_value(rws, R300_VID_DRM_PATCHLEVEL),
+ screen->get_name(screen),
+ rws->get_value(rws, R300_VID_PCI_ID),
+ rws->get_value(rws, R300_VID_GB_PIPES),
+ rws->get_value(rws, R300_VID_Z_PIPES),
+ rws->get_value(rws, R300_VID_GART_SIZE) >> 20,
+ rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20,
+ rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO",
+ rws->get_value(rws, R300_CAN_HYPERZ) &&
+ r300->screen->caps.zmask_ram ? "YES" : "NO",
+ rws->get_value(rws, R300_CAN_HYPERZ) &&
+ r300->screen->caps.hiz_ram ? "YES" : "NO");
+ }
+
return &r300->context;
- fail:
+fail:
r300_destroy_context(&r300->context);
return NULL;
}
-
-void r300_finish(struct r300_context *r300)
-{
- struct pipe_framebuffer_state *fb;
- unsigned i;
-
- /* This is a preliminary implementation of glFinish.
- *
- * The ideal implementation should use something like EmitIrqLocked and
- * WaitIrq, or better, real fences.
- */
- if (r300->fb_state.state) {
- fb = r300->fb_state.state;
-
- for (i = 0; i < fb->nr_cbufs; i++) {
- if (fb->cbufs[i]->texture) {
- r300->rws->buffer_wait(r300->rws,
- r300_texture(fb->cbufs[i]->texture)->buffer);
- return;
- }
- }
- if (fb->zsbuf && fb->zsbuf->texture) {
- r300->rws->buffer_wait(r300->rws,
- r300_texture(fb->zsbuf->texture)->buffer);
- }
- }
-}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 39dcde0610..e395f41290 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -30,11 +30,11 @@
#include "pipe/p_context.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
-
-#include "translate/translate_cache.h"
+#include "util/u_vbuf_mgr.h"
#include "r300_defines.h"
#include "r300_screen.h"
+#include "r300_winsys.h"
struct u_upload_mgr;
struct r300_context;
@@ -65,11 +65,15 @@ struct r300_aa_state {
};
struct r300_blend_state {
- uint32_t cb[8];
+ struct pipe_blend_state state;
+
+ uint32_t cb_clamp[8];
+ uint32_t cb_noclamp[8];
uint32_t cb_no_readwrite[8];
};
struct r300_blend_color_state {
+ struct pipe_blend_color state;
uint32_t cb[3];
};
@@ -91,9 +95,24 @@ struct r300_dsa_state {
uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */
uint32_t cb_reg;
uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */
+ uint32_t cb_reg1;
+ uint32_t alpha_value; /* R500_FG_ALPHA_VALUE: 0x4be0 */
+
+ /* The same, but for FP16 alpha test. */
+ uint32_t cb_begin_fp16;
+ uint32_t alpha_function_fp16; /* R300_FG_ALPHA_FUNC: 0x4bd4 */
+ uint32_t cb_reg_seq_fp16;
+ uint32_t z_buffer_control_fp16; /* R300_ZB_CNTL: 0x4f00 */
+ uint32_t z_stencil_control_fp16; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */
+ uint32_t stencil_ref_mask_fp16; /* R300_ZB_STENCILREFMASK: 0x4f08 */
+ uint32_t cb_reg_fp16;
+ uint32_t stencil_ref_bf_fp16; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */
+ uint32_t cb_reg1_fp16;
+ uint32_t alpha_value_fp16; /* R500_FG_ALPHA_VALUE: 0x4be0 */
/* The second command buffer disables zbuffer reads and writes. */
- uint32_t cb_no_readwrite[8];
+ uint32_t cb_zb_no_readwrite[10];
+ uint32_t cb_fp16_zb_no_readwrite[10];
/* Whether a two-sided stencil is enabled. */
boolean two_sided;
@@ -102,7 +121,6 @@ struct r300_dsa_state {
};
struct r300_hyperz_state {
- int current_func; /* -1 after a clear before first op */
int flush;
/* This is actually a command buffer with named dwords. */
uint32_t cb_flush_begin;
@@ -121,7 +139,7 @@ struct r300_gpu_flush {
uint32_t cb_flush_clean[6];
};
-#define RS_STATE_MAIN_SIZE 23
+#define RS_STATE_MAIN_SIZE 25
struct r300_rs_state {
/* Original rasterizer state. */
@@ -189,11 +207,6 @@ struct r300_sampler_view {
uint32_t texcache_region;
};
-struct r300_texture_fb_state {
- uint32_t pitch[R300_MAX_TEXTURE_LEVELS]; /* COLORPITCH or DEPTHPITCH. */
- uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT */
-};
-
struct r300_texture_sampler_state {
struct r300_texture_format_state format;
uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
@@ -225,11 +238,11 @@ struct r300_vertex_stream_state {
};
struct r300_invariant_state {
- uint32_t cb[20];
+ uint32_t cb[24];
};
struct r300_vap_invariant_state {
- uint32_t cb[9];
+ uint32_t cb[11];
};
struct r300_viewport_state {
@@ -273,14 +286,12 @@ struct r300_query {
/* How many results have been written, in dwords. It's incremented
* after end_query and flush. */
unsigned num_results;
- /* if we've flushed the query */
- boolean flushed;
/* if begin has been emitted */
boolean begin_emitted;
/* The buffer where query results are stored. */
- struct r300_winsys_buffer *buffer;
- struct r300_winsys_cs_buffer *cs_buffer;
+ struct r300_winsys_bo *buf;
+ struct r300_winsys_cs_handle *cs_buf;
/* The size of the buffer. */
unsigned buffer_size;
/* The domain of the buffer. */
@@ -291,33 +302,19 @@ struct r300_query {
struct r300_query* next;
};
-/* Fence object.
- *
- * This is a fake fence. Instead of syncing with the fence, we sync
- * with the context, which is inefficient but compliant.
- *
- * This is not a subclass of pipe_fence_handle because pipe_fence_handle is
- * never actually fully defined. So, rather than have it as a member, and do
- * subclass-style casting, we treat pipe_fence_handle as an opaque, and just
- * trust that our state tracker does not ever mess up fence objects.
- */
-struct r300_fence {
- struct pipe_reference reference;
- struct r300_context *ctx;
- boolean signalled;
-};
-
struct r300_surface {
struct pipe_surface base;
/* Winsys buffer backing the texture. */
- struct r300_winsys_buffer *buffer;
- struct r300_winsys_cs_buffer *cs_buffer;
+ struct r300_winsys_bo *buf;
+ struct r300_winsys_cs_handle *cs_buf;
enum r300_buffer_domain domain;
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
+ uint32_t pitch_zmask; /* ZMASK_PITCH */
+ uint32_t pitch_hiz; /* HIZ_PITCH */
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
/* Parameters dedicated to the CBZB clear. */
@@ -329,13 +326,9 @@ struct r300_surface {
/* Whether the CBZB clear is allowed on the surface. */
boolean cbzb_allowed;
-
};
struct r300_texture_desc {
- /* Parent class. */
- struct u_resource b;
-
/* Width, height, and depth.
* Most of the time, these are equal to pipe_texture::width0, height0,
* and depth0. However, NPOT 3D textures must have dimensions aligned
@@ -387,28 +380,39 @@ struct r300_texture_desc {
/* Whether CBZB fast color clear is allowed on the miplevel. */
boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS];
+
+ /* Zbuffer compression info for each miplevel. */
+ boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS];
+ /* If zero, then disable Z compression/HiZ. */
+ unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS];
+ unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS];
+ /* Zmask/HiZ strides for each miplevel. */
+ unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
+ unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
};
-struct r300_texture {
- struct r300_texture_desc desc;
+struct r300_resource
+{
+ struct u_vbuf_resource b;
+ /* Winsys buffer backing this resource. */
+ struct r300_winsys_bo *buf;
+ struct r300_winsys_cs_handle *cs_buf;
enum r300_buffer_domain domain;
+ unsigned buf_size;
+
+ /* Constant buffers are in user memory. */
+ uint8_t *constant_buffer;
- /* Pipe buffer backing this texture. */
- struct r300_winsys_buffer *buffer;
- struct r300_winsys_cs_buffer *cs_buffer;
+ /* Texture description (addressing, layout, special features). */
+ struct r300_texture_desc tex;
/* Registers carrying texture format data. */
/* Only format-independent bits should be filled in. */
struct r300_texture_format_state tx_format;
- /* All bits should be filled in. */
- struct r300_texture_fb_state fb_state;
- /* hyper-z memory allocs */
- struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS];
- struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS];
- boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS];
- boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS];
+ /* Where the texture starts in the buffer. */
+ unsigned tex_offset;
/* This is the level tiling flags were last time set for.
* It's used to prevent redundant tiling-flags changes from happening.*/
@@ -418,33 +422,29 @@ struct r300_texture {
struct r300_vertex_element_state {
unsigned count;
struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+ unsigned format_size[PIPE_MAX_ATTRIBS];
- /* If (velem[i].src_format != hw_format[i]), the vertex buffer
- * referenced by this vertex element cannot be used for rendering and
- * its vertex data must be translated to hw_format[i]. */
- enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
- unsigned hw_format_size[PIPE_MAX_ATTRIBS];
+ struct u_vbuf_mgr_elements *vmgr_elements;
/* The size of the vertex, in dwords. */
unsigned vertex_size_dwords;
- /* This might mean two things:
- * - src_format != hw_format, as discussed above.
- * - src_offset % 4 != 0. */
- boolean incompatible_layout;
-
struct r300_vertex_stream_state vertex_stream;
};
-struct r300_translate_context {
- /* Translate cache for incompatible vertex offset/stride/format fallback. */
- struct translate_cache *translate_cache;
+enum r300_hiz_func {
+ HIZ_FUNC_NONE,
+
+ /* The function, when determined, is set in stone
+ * until the next HiZ clear. */
- /* The vertex buffer slot containing the translated buffer. */
- unsigned vb_slot;
+ /* MAX is written to the HiZ buffer.
+ * Used for LESS, LEQUAL. */
+ HIZ_FUNC_MAX,
- /* Saved and new vertex element state. */
- void *saved_velems, *new_velems;
+ /* MIN is written to the HiZ buffer.
+ * Used for GREATER, GEQUAL. */
+ HIZ_FUNC_MIN,
};
struct r300_context {
@@ -473,8 +473,6 @@ struct r300_context {
struct blitter_context* blitter;
/* Stencil two-sided reference value fallback. */
struct r300_stencilref_context *stencilref_fallback;
- /* For translating vertex buffers having incompatible vertex layout. */
- struct r300_translate_context tran;
/* The KIL opcode needs the first texture unit to be enabled
* on r3xx-r4xx. In order to calm down the CS checker, we bind this
@@ -556,13 +554,8 @@ struct r300_context {
/* The pointers to the first and the last atom. */
struct r300_atom *first_dirty, *last_dirty;
- /* Vertex buffers for Gallium. */
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- int vertex_buffer_count;
- int vertex_buffer_max_index;
/* Vertex elements for Gallium. */
struct r300_vertex_element_state *velems;
- bool any_user_vbs;
struct pipe_index_buffer index_buffer;
@@ -587,21 +580,26 @@ struct r300_context {
int sprite_coord_enable;
/* Whether two-sided color selection is enabled (AKA light_twoside). */
boolean two_sided_color;
- /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
- boolean incompatible_vb_layout;
-#define R300_Z_COMPRESS_44 1
-#define RV350_Z_COMPRESS_88 2
- int z_compression;
+ /* Whether fast color clear is enabled. */
boolean cbzb_clear;
- boolean z_decomp_rd;
-
- /* two mem block managers for hiz/zmask ram space */
- struct mem_block *hiz_mm;
- struct mem_block *zmask_mm;
-
- /* upload managers */
- struct u_upload_mgr *upload_vb;
- struct u_upload_mgr *upload_ib;
+ /* Whether ZMASK is enabled. */
+ boolean zmask_in_use;
+ /* Whether ZMASK is being decompressed. */
+ boolean zmask_decompress;
+ /* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */
+ boolean hyperz_locked;
+ /* The zbuffer the ZMASK of which is locked. */
+ struct pipe_surface *locked_zbuffer;
+ /* Whether HIZ is enabled. */
+ boolean hiz_in_use;
+ /* HiZ function. Can be either MIN or MAX. */
+ enum r300_hiz_func hiz_func;
+ /* HiZ clear value. */
+ uint32_t hiz_clear_value;
+
+ void *dsa_decompress_zmask;
+
+ struct u_vbuf_mgr *vbuf_mgr;
struct util_slab_mempool pool_transfers;
@@ -611,13 +609,12 @@ struct r300_context {
/* const tracking for VS */
int vs_const_base;
- /* AOS (PACKET3_3D_LOAD_VBPNTR) command buffer for the case offset=0. */
- uint32_t aos_cb[(16 * 3 + 1) / 2];
- boolean aos_dirty;
-
- /* Whether any buffer (FB, textures, VBOs) has been set, but buffers
- * haven't been validated yet. */
- boolean validate_buffers;
+ /* Vertex array state info */
+ boolean vertex_arrays_dirty;
+ boolean vertex_arrays_indexed;
+ int vertex_arrays_offset;
+ int vertex_arrays_instance_id;
+ boolean instancing_enabled;
};
#define foreach_atom(r300, atom) \
@@ -637,9 +634,9 @@ static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf)
return (struct r300_surface*)surf;
}
-static INLINE struct r300_texture* r300_texture(struct pipe_resource* tex)
+static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex)
{
- return (struct r300_texture*)tex;
+ return (struct r300_resource*)tex;
}
static INLINE struct r300_context* r300_context(struct pipe_context* context)
@@ -652,12 +649,25 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300)
return (struct r300_fragment_shader*)r300->fs.state;
}
+static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
+ struct r300_atom *atom)
+{
+ atom->dirty = TRUE;
+
+ if (!r300->first_dirty) {
+ r300->first_dirty = atom;
+ r300->last_dirty = atom+1;
+ } else {
+ if (atom < r300->first_dirty)
+ r300->first_dirty = atom;
+ else if (atom+1 > r300->last_dirty)
+ r300->last_dirty = atom+1;
+ }
+}
+
struct pipe_context* r300_create_context(struct pipe_screen* screen,
void *priv);
-void r300_finish(struct r300_context *r300);
-void r300_flush_cb(void *data);
-
/* Context initialization. */
struct draw_stage* r300_draw_stage(struct r300_context* r300);
void r300_init_blit_functions(struct r300_context *r300);
@@ -668,10 +678,17 @@ void r300_init_state_functions(struct r300_context* r300);
void r300_init_resource_functions(struct r300_context* r300);
/* r300_blit.c */
-void r300_flush_depth_stencil(struct pipe_context *pipe,
- struct pipe_resource *dst,
- unsigned level,
- unsigned layer);
+void r300_decompress_zmask(struct r300_context *r300);
+void r300_decompress_zmask_locked_unsafe(struct r300_context *r300);
+void r300_decompress_zmask_locked(struct r300_context *r300);
+
+/* r300_flush.c */
+void r300_flush(struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence);
+
+/* r300_hyperz.c */
+void r300_update_hyperz_state(struct r300_context* r300);
/* r300_query.c */
void r300_resume_query(struct r300_context *r300,
@@ -679,8 +696,6 @@ void r300_resume_query(struct r300_context *r300,
void r300_stop_query(struct r300_context *r300);
/* r300_render_translate.c */
-void r300_begin_vertex_translate(struct r300_context *r300);
-void r300_end_vertex_translate(struct r300_context *r300);
void r300_translate_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned *index_size, unsigned index_offset,
@@ -696,29 +711,16 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias);
/* r300_state.c */
enum r300_fb_state_change {
R300_CHANGED_FB_STATE = 0,
- R300_CHANGED_CBZB_FLAG,
- R300_CHANGED_ZCLEAR_FLAG
+ R300_CHANGED_HYPERZ_FLAG,
+ R300_CHANGED_MULTIWRITE
};
void r300_mark_fb_state_dirty(struct r300_context *r300,
enum r300_fb_state_change change);
void r300_mark_fs_code_dirty(struct r300_context *r300);
-static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
- struct r300_atom *atom)
-{
- atom->dirty = TRUE;
-
- if (!r300->first_dirty) {
- r300->first_dirty = atom;
- r300->last_dirty = atom+1;
- } else {
- if (atom < r300->first_dirty)
- r300->first_dirty = atom;
- if (atom+1 > r300->last_dirty)
- r300->last_dirty = atom+1;
- }
-}
+/* r300_state_derived.c */
+void r300_update_derived_state(struct r300_context* r300);
/* r300_debug.c */
void r500_dump_rs_block(struct r300_rs_block *rs);
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 67fb0096a8..2e52dfa43c 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -35,12 +35,6 @@
* that they neatly hide away, and don't have the cost of function setup,so
* we're going to use them. */
-#ifdef DEBUG
-#define CS_DEBUG(x) x
-#else
-#define CS_DEBUG(x)
-#endif
-
/**
* Command submission setup.
*/
@@ -50,22 +44,29 @@
struct r300_winsys_screen *cs_winsys = (context)->rws; \
int cs_count = 0; (void) cs_count; (void) cs_winsys;
+#ifdef DEBUG
+
#define BEGIN_CS(size) do { \
assert(size <= (R300_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \
- CS_DEBUG(cs_count = size;) \
+ cs_count = size; \
} while (0)
-#ifdef DEBUG
#define END_CS do { \
if (cs_count != 0) \
debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
cs_count, __FUNCTION__, __FILE__, __LINE__); \
cs_count = 0; \
} while (0)
+
+#define CS_USED_DW(x) cs_count -= (x)
+
#else
+
+#define BEGIN_CS(size)
#define END_CS
-#endif
+#define CS_USED_DW(x)
+#endif
/**
* Writing pure DWORDs.
@@ -73,7 +74,7 @@
#define OUT_CS(value) do { \
cs_copy->buf[cs_copy->cdw++] = (value); \
- CS_DEBUG(cs_count--;) \
+ CS_USED_DW(1); \
} while (0)
#define OUT_CS_32F(value) \
@@ -98,7 +99,7 @@
#define OUT_CS_TABLE(values, count) do { \
memcpy(cs_copy->buf + cs_copy->cdw, values, count * 4); \
cs_copy->cdw += count; \
- CS_DEBUG(cs_count -= count;) \
+ CS_USED_DW(count); \
} while (0)
@@ -106,27 +107,11 @@
* Writing relocations.
*/
-#define OUT_CS_RELOC(bo, offset, rd, wd) do { \
- assert(bo); \
- OUT_CS(offset); \
- cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \
- CS_DEBUG(cs_count -= 2;) \
-} while (0)
-
-#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \
- assert(bo); \
- OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset, rd, wd); \
-} while (0)
-
-#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \
- assert(tex); \
- OUT_CS_RELOC(tex->cs_buffer, offset, rd, wd); \
-} while (0)
-
-#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \
- assert(bo); \
- cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf, rd, wd); \
- CS_DEBUG(cs_count -= 2;) \
+#define OUT_CS_RELOC(r) do { \
+ assert((r)); \
+ assert((r)->cs_buf); \
+ cs_winsys->cs_write_reloc(cs_copy, (r)->cs_buf); \
+ CS_USED_DW(2); \
} while (0)
@@ -135,7 +120,7 @@
*/
#define WRITE_CS_TABLE(values, count) do { \
- CS_DEBUG(assert(cs_count == 0);) \
+ assert(cs_count == 0); \
memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \
cs_copy->cdw += (count); \
} while (0)
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 52031dd97b..b60cfd1f24 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -27,6 +27,7 @@
#include <stdio.h>
static const struct debug_named_value debug_options[] = {
+ { "info", DBG_INFO, "Print hardware info"},
{ "fp", DBG_FP, "Log fragment program compilation" },
{ "vp", DBG_VP, "Log vertex program compilation" },
{ "pstat", DBG_P_STAT, "Log vertex/fragment program stats" },
@@ -41,6 +42,7 @@ static const struct debug_named_value debug_options[] = {
{ "fb", DBG_FB, "Log framebuffer" },
{ "cbzb", DBG_CBZB, "Log fast color clear info" },
{ "hyperz", DBG_HYPERZ, "Log HyperZ info" },
+ { "upload", DBG_UPLOAD, "Log user buffer upload info" },
{ "scissor", DBG_SCISSOR, "Log scissor info" },
{ "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" },
{ "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" },
@@ -48,6 +50,8 @@ static const struct debug_named_value debug_options[] = {
{ "noimmd", DBG_NO_IMMD, "Disable immediate mode" },
{ "noopt", DBG_NO_OPT, "Disable shader optimizations" },
{ "nocbzb", DBG_NO_CBZB, "Disable fast color clear" },
+ { "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" },
+ { "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" },
/* must be last */
DEBUG_NAMED_VALUE_END
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 04a5bd92d1..e17a907e77 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -26,7 +26,6 @@
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_mm.h"
-#include "util/u_simple_list.h"
#include "r300_context.h"
#include "r300_cb.h"
@@ -46,7 +45,10 @@ void r300_emit_blend_state(struct r300_context* r300,
CS_LOCALS(r300);
if (fb->nr_cbufs) {
- WRITE_CS_TABLE(blend->cb, size);
+ if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT)
+ WRITE_CS_TABLE(blend->cb_noclamp, size);
+ else
+ WRITE_CS_TABLE(blend->cb_clamp, size);
} else {
WRITE_CS_TABLE(blend->cb_no_readwrite, size);
}
@@ -78,9 +80,15 @@ void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state)
CS_LOCALS(r300);
if (fb->zsbuf) {
- WRITE_CS_TABLE(&dsa->cb_begin, size);
+ if (fb->nr_cbufs && fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT)
+ WRITE_CS_TABLE(&dsa->cb_begin_fp16, size);
+ else
+ WRITE_CS_TABLE(&dsa->cb_begin, size);
} else {
- WRITE_CS_TABLE(dsa->cb_no_readwrite, size);
+ if (fb->nr_cbufs && fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT)
+ WRITE_CS_TABLE(dsa->cb_fp16_zb_no_readwrite, size);
+ else
+ WRITE_CS_TABLE(dsa->cb_zb_no_readwrite, size);
}
}
@@ -90,7 +98,7 @@ static void get_rc_constant_state(
struct rc_constant * constant)
{
struct r300_textures_state* texstate = r300->textures_state.state;
- struct r300_texture *tex;
+ struct r300_resource *tex;
assert(constant->Type == RC_CONSTANT_STATE);
@@ -102,19 +110,19 @@ static void get_rc_constant_state(
/* Factor for converting rectangle coords to
* normalized coords. Should only show up on non-r500. */
case RC_STATE_R300_TEXRECT_FACTOR:
- tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture);
- vec[0] = 1.0 / tex->desc.width0;
- vec[1] = 1.0 / tex->desc.height0;
+ tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture);
+ vec[0] = 1.0 / tex->tex.width0;
+ vec[1] = 1.0 / tex->tex.height0;
vec[2] = 0;
vec[3] = 1;
break;
case RC_STATE_R300_TEXSCALE_FACTOR:
- tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture);
+ tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture);
/* Add a small number to the texture size to work around rounding errors in hw. */
- vec[0] = tex->desc.b.b.width0 / (tex->desc.width0 + 0.001f);
- vec[1] = tex->desc.b.b.height0 / (tex->desc.height0 + 0.001f);
- vec[2] = tex->desc.b.b.depth0 / (tex->desc.depth0 + 0.001f);
+ vec[0] = tex->b.b.b.width0 / (tex->tex.width0 + 0.001f);
+ vec[1] = tex->b.b.b.height0 / (tex->tex.height0 + 0.001f);
+ vec[2] = tex->b.b.b.depth0 / (tex->tex.depth0 + 0.001f);
vec[3] = 1;
break;
@@ -353,11 +361,9 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state)
OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config);
if (aa->dest) {
- OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1);
- OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset, 0, aa->dest->domain);
-
- OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1);
- OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch, 0, aa->dest->domain);
+ OUT_CS_REG(R300_RB3D_AARESOLVE_OFFSET, aa->dest->offset);
+ OUT_CS_RELOC(aa->dest);
+ OUT_CS_REG(R300_RB3D_AARESOLVE_PITCH, aa->dest->pitch);
}
OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl);
@@ -370,6 +376,8 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
struct r300_surface* surf;
unsigned i;
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
+ uint32_t rb3d_cctl = 0;
+
CS_LOCALS(r300);
BEGIN_CS(size);
@@ -377,21 +385,24 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
/* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not
* what we usually want. */
if (r300->screen->caps.is_r500) {
- OUT_CS_REG(R300_RB3D_CCTL,
- R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE);
- } else {
- OUT_CS_REG(R300_RB3D_CCTL, 0);
+ rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE;
}
+ if (fb->nr_cbufs &&
+ r300_fragment_shader_writes_all(r300_fs(r300))) {
+ rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs);
+ }
+
+ OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl);
/* Set up colorbuffers. */
for (i = 0; i < fb->nr_cbufs; i++) {
surf = r300_surface(fb->cbufs[i]);
- OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->offset, 0, surf->domain);
+ OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset);
+ OUT_CS_RELOC(surf);
- OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->pitch, 0, surf->domain);
+ OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch);
+ OUT_CS_RELOC(surf);
}
/* Set up the ZB part of the CBZB clear. */
@@ -400,11 +411,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
- OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset, 0, surf->domain);
+ OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset);
+ OUT_CS_RELOC(surf);
- OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch, 0, surf->domain);
+ OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch);
+ OUT_CS_RELOC(surf);
DBG(r300, DBG_CBZB,
"CBZB clearing cbuf %08x %08x\n", surf->cbzb_format,
@@ -416,37 +427,19 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG(R300_ZB_FORMAT, surf->format);
- OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->offset, 0, surf->domain);
+ OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset);
+ OUT_CS_RELOC(surf);
- OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->pitch, 0, surf->domain);
+ OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch);
+ OUT_CS_RELOC(surf);
if (can_hyperz) {
- uint32_t surf_pitch;
- struct r300_texture *tex;
- int level = surf->base.u.tex.level;
- tex = r300_texture(surf->base.texture);
-
- surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK;
/* HiZ RAM. */
- if (r300->screen->caps.hiz_ram) {
- if (tex->hiz_mem[level]) {
- OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2);
- OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch);
- } else {
- OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
- OUT_CS_REG(R300_ZB_HIZ_PITCH, 0);
- }
- }
+ OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
+ OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz);
/* Z Mask RAM. (compressed zbuffer) */
- if (tex->zmask_mem[level]) {
- OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2);
- OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
- } else {
- OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
- OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0);
- }
+ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
+ OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask);
}
}
@@ -458,6 +451,7 @@ void r300_emit_hyperz_state(struct r300_context *r300,
{
struct r300_hyperz_state *z = state;
CS_LOCALS(r300);
+
if (z->flush)
WRITE_CS_TABLE(&z->cb_flush_begin, size);
else
@@ -483,15 +477,22 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
{
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- unsigned i;
+ unsigned i, num_cbufs = fb->nr_cbufs;
+ unsigned mspos0, mspos1;
CS_LOCALS(r300);
+ /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be
+ * marked as UNUSED in the US block. */
+ if (r300_fragment_shader_writes_all(r300_fs(r300))) {
+ num_cbufs = MIN2(num_cbufs, 1);
+ }
+
BEGIN_CS(size);
/* Colorbuffer format in the US block.
* (must be written after unpipelined regs) */
OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);
- for (i = 0; i < fb->nr_cbufs; i++) {
+ for (i = 0; i < num_cbufs; i++) {
OUT_CS(r300_surface(fb->cbufs[i])->format);
}
for (; i < 4; i++) {
@@ -501,38 +502,36 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
/* Multisampling. Depends on framebuffer sample count.
* These are pipelined regs and as such cannot be moved
* to the AA state. */
- if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
- unsigned mspos0 = 0x66666666;
- unsigned mspos1 = 0x6666666;
-
- if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) {
- /* Subsample placement. These may not be optimal. */
- switch (fb->cbufs[0]->texture->nr_samples) {
- case 2:
- mspos0 = 0x33996633;
- mspos1 = 0x6666663;
- break;
- case 3:
- mspos0 = 0x33936933;
- mspos1 = 0x6666663;
- break;
- case 4:
- mspos0 = 0x33939933;
- mspos1 = 0x3966663;
- break;
- case 6:
- mspos0 = 0x22a2aa22;
- mspos1 = 0x2a65672;
- break;
- default:
- debug_printf("r300: Bad number of multisamples!\n");
- }
- }
+ mspos0 = 0x66666666;
+ mspos1 = 0x6666666;
- OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
- OUT_CS(mspos0);
- OUT_CS(mspos1);
+ if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) {
+ /* Subsample placement. These may not be optimal. */
+ switch (fb->cbufs[0]->texture->nr_samples) {
+ case 2:
+ mspos0 = 0x33996633;
+ mspos1 = 0x6666663;
+ break;
+ case 3:
+ mspos0 = 0x33936933;
+ mspos1 = 0x6666663;
+ break;
+ case 4:
+ mspos0 = 0x33939933;
+ mspos1 = 0x3966663;
+ break;
+ case 6:
+ mspos0 = 0x22a2aa22;
+ mspos1 = 0x2a65672;
+ break;
+ default:
+ debug_printf("r300: Bad number of multisamples!\n");
+ }
}
+
+ OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
+ OUT_CS(mspos0);
+ OUT_CS(mspos1);
END_CS;
}
@@ -553,14 +552,12 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state)
OUT_CS_REG(R300_ZB_ZPASS_DATA, 0);
END_CS;
query->begin_emitted = TRUE;
- query->flushed = FALSE;
}
static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
struct r300_query *query)
{
struct r300_capabilities* caps = &r300->screen->caps;
- struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer;
CS_LOCALS(r300);
assert(caps->num_frag_pipes);
@@ -578,29 +575,25 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
case 4:
/* pipe 3 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 3) * 4,
- 0, query->domain);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 3) * 4);
+ OUT_CS_RELOC(r300->query_current);
case 3:
/* pipe 2 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 2);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 2) * 4,
- 0, query->domain);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 2) * 4);
+ OUT_CS_RELOC(r300->query_current);
case 2:
/* pipe 1 only */
/* As mentioned above, accomodate RV380 and older. */
OUT_CS_REG(R300_SU_REG_DEST,
1 << (caps->high_second_pipe ? 3 : 1));
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 1) * 4,
- 0, query->domain);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4);
+ OUT_CS_RELOC(r300->query_current);
case 1:
/* pipe 0 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 0);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 0) * 4,
- 0, query->domain);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4);
+ OUT_CS_RELOC(r300->query_current);
break;
default:
fprintf(stderr, "r300: Implementation error: Chipset reports %d"
@@ -616,13 +609,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
static void rv530_emit_query_end_single_z(struct r300_context *r300,
struct r300_query *query)
{
- struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer;
CS_LOCALS(r300);
BEGIN_CS(8);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, query->num_results * 4);
+ OUT_CS_RELOC(r300->query_current);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -630,16 +622,15 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300,
static void rv530_emit_query_end_double_z(struct r300_context *r300,
struct r300_query *query)
{
- struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer;
CS_LOCALS(r300);
BEGIN_CS(14);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4);
+ OUT_CS_RELOC(r300->query_current);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4);
+ OUT_CS_RELOC(r300->query_current);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -778,7 +769,7 @@ void r300_emit_textures_state(struct r300_context *r300,
{
struct r300_textures_state *allstate = (struct r300_textures_state*)state;
struct r300_texture_sampler_state *texstate;
- struct r300_texture *tex;
+ struct r300_resource *tex;
unsigned i;
CS_LOCALS(r300);
@@ -788,7 +779,7 @@ void r300_emit_textures_state(struct r300_context *r300,
for (i = 0; i < allstate->count; i++) {
if ((1 << i) & allstate->tx_enable) {
texstate = &allstate->regs[i];
- tex = r300_texture(allstate->sampler_views[i]->base.texture);
+ tex = r300_resource(allstate->sampler_views[i]->base.texture);
OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0);
OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1);
@@ -799,73 +790,35 @@ void r300_emit_textures_state(struct r300_context *r300,
OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format.format1);
OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2);
- OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1);
- OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain,
- 0);
+ OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config);
+ OUT_CS_RELOC(tex);
}
}
END_CS;
}
-static void r300_update_aos_cb(struct r300_context *r300, unsigned packet_size)
+void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
+ boolean indexed, int instance_id)
{
- struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
+ struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer;
+ struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer;
struct pipe_vertex_element *velem = r300->velems->velem;
- unsigned *hw_format_size = r300->velems->hw_format_size;
- unsigned size1, size2, aos_count = r300->velems->count;
+ struct r300_resource *buf;
int i;
- CB_LOCALS;
-
- BEGIN_CB(r300->aos_cb, packet_size);
- for (i = 0; i < aos_count - 1; i += 2) {
- vb1 = &vbuf[velem[i].vertex_buffer_index];
- vb2 = &vbuf[velem[i+1].vertex_buffer_index];
- size1 = hw_format_size[i];
- size2 = hw_format_size[i+1];
-
- OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
- R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
- OUT_CB(vb1->buffer_offset + velem[i].src_offset);
- OUT_CB(vb2->buffer_offset + velem[i+1].src_offset);
- }
-
- if (aos_count & 1) {
- vb1 = &vbuf[velem[i].vertex_buffer_index];
- size1 = hw_format_size[i];
-
- OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
- OUT_CB(vb1->buffer_offset + velem[i].src_offset);
- }
- END_CB;
-
- r300->aos_dirty = FALSE;
-}
-
-void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed)
-{
- struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
- struct pipe_vertex_element *velem = r300->velems->velem;
- struct r300_buffer *buf;
- int i;
- unsigned aos_count = r300->velems->count;
- unsigned packet_size = (aos_count * 3 + 1) / 2;
+ unsigned vertex_array_count = r300->velems->count;
+ unsigned packet_size = (vertex_array_count * 3 + 1) / 2;
+ struct pipe_vertex_buffer *vb1, *vb2;
+ unsigned *hw_format_size = r300->velems->format_size;
+ unsigned size1, size2, offset1, offset2, stride1, stride2;
CS_LOCALS(r300);
- BEGIN_CS(2 + packet_size + aos_count * 2);
+ BEGIN_CS(2 + packet_size + vertex_array_count * 2);
OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
- OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
-
- if (!offset) {
- if (r300->aos_dirty) {
- r300_update_aos_cb(r300, packet_size);
- }
- OUT_CS_TABLE(r300->aos_cb, packet_size);
- } else {
- struct pipe_vertex_buffer *vb1, *vb2;
- unsigned *hw_format_size = r300->velems->hw_format_size;
- unsigned size1, size2;
+ OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
- for (i = 0; i < aos_count - 1; i += 2) {
+ if (instance_id == -1) {
+ /* Non-instanced arrays. This ignores instance_divisor and instance_id. */
+ for (i = 0; i < vertex_array_count - 1; i += 2) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
vb2 = &vbuf[velem[i+1].vertex_buffer_index];
size1 = hw_format_size[i];
@@ -877,23 +830,75 @@ void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed)
OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
}
- if (aos_count & 1) {
+ if (vertex_array_count & 1) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
size1 = hw_format_size[i];
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
}
- }
- for (i = 0; i < aos_count; i++) {
- buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer);
- OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0);
+ for (i = 0; i < vertex_array_count; i++) {
+ buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
+ OUT_CS_RELOC(buf);
+ }
+ } else {
+ /* Instanced arrays. */
+ for (i = 0; i < vertex_array_count - 1; i += 2) {
+ vb1 = &vbuf[velem[i].vertex_buffer_index];
+ vb2 = &vbuf[velem[i+1].vertex_buffer_index];
+ size1 = hw_format_size[i];
+ size2 = hw_format_size[i+1];
+
+ if (velem[i].instance_divisor) {
+ stride1 = 0;
+ offset1 = vb1->buffer_offset + velem[i].src_offset +
+ (instance_id / velem[i].instance_divisor) * vb1->stride;
+ } else {
+ stride1 = vb1->stride;
+ offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
+ }
+ if (velem[i+1].instance_divisor) {
+ stride2 = 0;
+ offset2 = vb2->buffer_offset + velem[i+1].src_offset +
+ (instance_id / velem[i+1].instance_divisor) * vb2->stride;
+ } else {
+ stride2 = vb2->stride;
+ offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride;
+ }
+
+ OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) |
+ R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2));
+ OUT_CS(offset1);
+ OUT_CS(offset2);
+ }
+
+ if (vertex_array_count & 1) {
+ vb1 = &vbuf[velem[i].vertex_buffer_index];
+ size1 = hw_format_size[i];
+
+ if (velem[i].instance_divisor) {
+ stride1 = 0;
+ offset1 = vb1->buffer_offset + velem[i].src_offset +
+ (instance_id / velem[i].instance_divisor) * vb1->stride;
+ } else {
+ stride1 = vb1->stride;
+ offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
+ }
+
+ OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));
+ OUT_CS(offset1);
+ }
+
+ for (i = 0; i < vertex_array_count; i++) {
+ buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
+ OUT_CS_RELOC(buf);
+ }
}
END_CS;
}
-void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed)
+void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
{
CS_LOCALS(r300);
@@ -913,7 +918,8 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed)
OUT_CS(r300->vertex_info.size |
(r300->vertex_info.size << 8));
OUT_CS(r300->draw_vbo_offset);
- OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0);
+ OUT_CS(0);
+ OUT_CS_RELOC(r300_resource(r300->vbo));
END_CS;
}
@@ -1080,109 +1086,47 @@ void r300_emit_viewport_state(struct r300_context* r300,
END_CS;
}
-static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val)
-{
- CS_LOCALS(r300);
- BEGIN_CS(4);
- OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2);
- OUT_CS(start);
- OUT_CS(count);
- OUT_CS(val);
- END_CS;
-}
-
-static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val)
-{
- CS_LOCALS(r300);
- BEGIN_CS(4);
- OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
- OUT_CS(start);
- OUT_CS(count);
- OUT_CS(val);
- END_CS;
-}
-
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
-
void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- struct r300_hyperz_state *z =
- (struct r300_hyperz_state*)r300->hyperz_state.state;
- struct r300_screen* r300screen = r300->screen;
- uint32_t stride, offset = 0, height, offset_shift;
- struct r300_texture* tex;
- int i;
-
- tex = r300_texture(fb->zsbuf->texture);
-
- offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs;
- stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level];
-
- /* convert from pixels to 4x4 blocks */
- stride = ALIGN_DIVUP(stride, 4);
-
- stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes);
- /* there are 4 blocks per dwords */
- stride = ALIGN_DIVUP(stride, 4);
-
- height = ALIGN_DIVUP(fb->zsbuf->height, 4);
+ struct r300_resource* tex;
+ CS_LOCALS(r300);
- offset_shift = 2;
- offset_shift += (r300screen->caps.num_frag_pipes / 2);
+ tex = r300_resource(fb->zsbuf->texture);
- for (i = 0; i < height; i++) {
- offset = i * stride;
- offset <<= offset_shift;
- r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff);
- }
- z->current_func = -1;
+ BEGIN_CS(size);
+ OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2);
+ OUT_CS(0);
+ OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]);
+ OUT_CS(r300->hiz_clear_value);
+ END_CS;
/* Mark the current zbuffer's hiz ram as in use. */
- tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE;
+ r300->hiz_in_use = TRUE;
+ r300->hiz_func = HIZ_FUNC_NONE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- struct r300_screen* r300screen = r300->screen;
- uint32_t stride, offset = 0;
- struct r300_texture* tex;
- uint32_t i, height;
- int mult, offset_shift;
-
- tex = r300_texture(fb->zsbuf->texture);
- stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level];
-
- offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs;
-
- if (r300->z_compression == RV350_Z_COMPRESS_88)
- mult = 8;
- else
- mult = 4;
-
- height = ALIGN_DIVUP(fb->zsbuf->height, mult);
-
- offset_shift = 4;
- offset_shift += (r300screen->caps.num_frag_pipes / 2);
- stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes);
+ struct r300_resource *tex;
+ CS_LOCALS(r300);
- /* okay have width in pixels - divide by block width */
- stride = ALIGN_DIVUP(stride, mult);
- /* have width in blocks - divide by number of fragment pipes screen width */
- /* 16 blocks per dword */
- stride = ALIGN_DIVUP(stride, 16);
+ tex = r300_resource(fb->zsbuf->texture);
- for (i = 0; i < height; i++) {
- offset = i * stride;
- offset <<= offset_shift;
- r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff);
- }
+ BEGIN_CS(size);
+ OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
+ OUT_CS(0);
+ OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]);
+ OUT_CS(0);
+ END_CS;
/* Mark the current zbuffer's zmask as in use. */
- tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE;
+ r300->zmask_in_use = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
void r300_emit_ztop_state(struct r300_context* r300,
@@ -1209,68 +1153,77 @@ boolean r300_emit_buffer_validate(struct r300_context *r300,
boolean do_validate_vertex_buffers,
struct pipe_resource *index_buffer)
{
- struct pipe_framebuffer_state* fb =
+ struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_textures_state *texstate =
(struct r300_textures_state*)r300->textures_state.state;
- struct r300_texture* tex;
- struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
- struct pipe_vertex_element *velem = r300->velems->velem;
- struct pipe_resource *pbuf;
+ struct r300_resource *tex;
unsigned i;
-
- /* Clean out BOs. */
- r300->rws->cs_reset_buffers(r300->cs);
-
- /* Color buffers... */
- for (i = 0; i < fb->nr_cbufs; i++) {
- tex = r300_texture(fb->cbufs[i]->texture);
- assert(tex && tex->buffer && "cbuf is marked, but NULL!");
- r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, 0,
- r300_surface(fb->cbufs[i])->domain);
- }
- /* ...depth buffer... */
- if (fb->zsbuf) {
- tex = r300_texture(fb->zsbuf->texture);
- assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
- r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, 0,
- r300_surface(fb->zsbuf)->domain);
- }
- /* ...textures... */
- for (i = 0; i < texstate->count; i++) {
- if (!(texstate->tx_enable & (1 << i))) {
- continue;
+ boolean flushed = FALSE;
+
+validate:
+ if (r300->fb_state.dirty) {
+ /* Color buffers... */
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ tex = r300_resource(fb->cbufs[i]->texture);
+ assert(tex && tex->buf && "cbuf is marked, but NULL!");
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0,
+ r300_surface(fb->cbufs[i])->domain);
+ }
+ /* ...depth buffer... */
+ if (fb->zsbuf) {
+ tex = r300_resource(fb->zsbuf->texture);
+ assert(tex && tex->buf && "zsbuf is marked, but NULL!");
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0,
+ r300_surface(fb->zsbuf)->domain);
}
+ }
+ if (r300->textures_state.dirty) {
+ /* ...textures... */
+ for (i = 0; i < texstate->count; i++) {
+ if (!(texstate->tx_enable & (1 << i))) {
+ continue;
+ }
- tex = r300_texture(texstate->sampler_views[i]->base.texture);
- r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, tex->domain, 0);
+ tex = r300_resource(texstate->sampler_views[i]->base.texture);
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, tex->domain, 0);
+ }
}
/* ...occlusion query buffer... */
if (r300->query_current)
- r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buffer,
- 0, r300->query_current->domain);
+ r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
+ 0, r300->query_current->domain);
/* ...vertex buffer for SWTCL path... */
if (r300->vbo)
- r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->cs_buf,
- r300_buffer(r300->vbo)->domain, 0);
+ r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf,
+ r300_resource(r300->vbo)->domain, 0);
/* ...vertex buffers for HWTCL path... */
- if (do_validate_vertex_buffers) {
- for (i = 0; i < r300->velems->count; i++) {
- pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
- if (!pbuf)
+ if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
+ struct pipe_resource **buf = r300->vbuf_mgr->real_vertex_buffer;
+ struct pipe_resource **last = r300->vbuf_mgr->real_vertex_buffer +
+ r300->vbuf_mgr->nr_real_vertex_buffers;
+ for (; buf != last; buf++) {
+ if (!*buf)
continue;
- r300->rws->cs_add_buffer(r300->cs, r300_buffer(pbuf)->cs_buf,
- r300_buffer(pbuf)->domain, 0);
+ r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf,
+ r300_resource(*buf)->domain, 0);
}
}
/* ...and index buffer for HWTCL path. */
if (index_buffer)
- r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->cs_buf,
- r300_buffer(index_buffer)->domain, 0);
+ r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
+ r300_resource(index_buffer)->domain, 0);
+ /* Now do the validation. */
if (!r300->rws->cs_validate(r300->cs)) {
- return FALSE;
+ /* Ooops, an infinite loop, give up. */
+ if (flushed)
+ return FALSE;
+
+ r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL);
+ flushed = TRUE;
+ goto validate;
}
return TRUE;
@@ -1300,7 +1253,7 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)
/* Emitted in flush. */
dwords += 26; /* emit_query_end */
dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
- if (r300->screen->caps.index_bias_supported)
+ if (r300->screen->caps.is_r500)
dwords += 2;
return dwords;
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 278dbcb4c7..6c1c9d2fb1 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -31,7 +31,8 @@ struct r300_vertex_program_code;
uint32_t pack_float24(float f);
-void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed);
+void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
+ boolean indexed, int instance_id);
void r300_emit_blend_state(struct r300_context* r300,
unsigned size, void* state);
@@ -86,7 +87,7 @@ void r300_emit_scissor_state(struct r300_context* r300,
void r300_emit_textures_state(struct r300_context *r300,
unsigned size, void *state);
-void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed);
+void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed);
void r300_emit_vap_invariant_state(struct r300_context *r300,
unsigned size, void *state);
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 451fe525b4..b3d0d344ec 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -31,29 +31,38 @@
#include "r300_cs.h"
#include "r300_emit.h"
-static void r300_flush(struct pipe_context* pipe,
- unsigned flags,
- struct pipe_fence_handle** fence)
+
+void r300_flush(struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence)
{
struct r300_context *r300 = r300_context(pipe);
- struct r300_query *query;
struct r300_atom *atom;
- struct r300_fence **rfence = (struct r300_fence**)fence;
-
- u_upload_flush(r300->upload_vb);
- u_upload_flush(r300->upload_ib);
+ struct r300_winsys_bo **rfence = (struct r300_winsys_bo**)fence;
if (r300->draw && !r300->draw_vbo_locked)
r300_draw_flush_vbuf(r300);
+ if (rfence) {
+ /* Create a fence, which is a dummy BO. */
+ *rfence = r300->rws->buffer_create(r300->rws, 1, 1,
+ PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STATIC,
+ R300_DOMAIN_GTT);
+ /* Add the fence as a dummy relocation. */
+ r300->rws->cs_add_reloc(r300->cs,
+ r300->rws->buffer_get_cs_handle(*rfence),
+ R300_DOMAIN_GTT, R300_DOMAIN_GTT);
+ }
+
if (r300->dirty_hw) {
r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
- if (r300->screen->caps.index_bias_supported)
+ if (r300->screen->caps.is_r500)
r500_emit_index_bias(r300, 0);
r300->flush_counter++;
- r300->rws->cs_flush(r300->cs);
+ r300->rws->cs_flush(r300->cs, flags);
r300->dirty_hw = 0;
/* New kitchen sink, baby. */
@@ -62,30 +71,35 @@ static void r300_flush(struct pipe_context* pipe,
r300_mark_atom_dirty(r300, atom);
}
}
+ r300->vertex_arrays_dirty = TRUE;
/* Unmark HWTCL state for SWTCL. */
if (!r300->screen->caps.has_tcl) {
r300->vs_state.dirty = FALSE;
r300->vs_constants.dirty = FALSE;
}
-
- r300->validate_buffers = TRUE;
- }
-
- /* reset flushed query */
- foreach(query, &r300->query_list) {
- query->flushed = TRUE;
+ } else {
+ if (rfence) {
+ /* We have to create a fence object, but the command stream is empty
+ * and we cannot emit an empty CS. We must write some regs then. */
+ CS_LOCALS(r300);
+ OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0);
+ r300->rws->cs_flush(r300->cs, flags);
+ } else {
+ /* Even if hw is not dirty, we should at least reset the CS in case
+ * the space checking failed for the first draw operation. */
+ r300->rws->cs_flush(r300->cs, flags);
+ }
}
+}
- /* Create a new fence. */
- if (rfence) {
- *rfence = CALLOC_STRUCT(r300_fence);
- pipe_reference_init(&(*rfence)->reference, 1);
- (*rfence)->ctx = r300;
- }
+static void r300_flush_wrapped(struct pipe_context *pipe,
+ struct pipe_fence_handle **fence)
+{
+ r300_flush(pipe, 0, fence);
}
void r300_init_flush_functions(struct r300_context* r300)
{
- r300->context.flush = r300_flush;
+ r300->context.flush = r300_flush_wrapped;
}
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 2936c3486e..4c502fefb3 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -149,16 +149,18 @@ static void get_external_state(
unsigned i;
unsigned char *swizzle;
+ state->frag_clamp = 0;
+
for (i = 0; i < texstate->sampler_state_count; i++) {
struct r300_sampler_state *s = texstate->sampler_states[i];
struct r300_sampler_view *v = texstate->sampler_views[i];
- struct r300_texture *t;
+ struct r300_resource *t;
if (!s || !v) {
continue;
}
- t = r300_texture(texstate->sampler_views[i]->base.texture);
+ t = r300_resource(texstate->sampler_views[i]->base.texture);
if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
state->unit[i].compare_mode_enabled = 1;
@@ -181,7 +183,7 @@ static void get_external_state(
state->unit[i].non_normalized_coords = !s->state.normalized_coords;
/* XXX this should probably take into account STR, not just S. */
- if (t->desc.is_npot) {
+ if (t->tex.is_npot) {
switch (s->state.wrap_s) {
case PIPE_TEX_WRAP_REPEAT:
state->unit[i].wrap_mode = RC_WRAP_REPEAT;
@@ -201,7 +203,7 @@ static void get_external_state(
state->unit[i].wrap_mode = RC_WRAP_NONE;
}
- if (t->desc.b.b.target == PIPE_TEXTURE_3D)
+ if (t->b.b.b.target == PIPE_TEXTURE_3D)
state->unit[i].clamp_and_scale_before_fetch = TRUE;
}
}
@@ -298,44 +300,98 @@ static void r300_emit_fs_code_to_buffer(
}
} else { /* r300 */
struct r300_fragment_program_code *code = &generic_code->code.r300;
-
- shader->cb_code_size = 19 +
- (r300->screen->caps.is_r400 ? 2 : 0) +
- code->alu.length * 4 +
- (code->tex.length ? (1 + code->tex.length) : 0) +
- imm_count * 5;
+ unsigned int alu_length = code->alu.length;
+ unsigned int alu_iterations = ((alu_length - 1) / 64) + 1;
+ unsigned int tex_length = code->tex.length;
+ unsigned int tex_iterations =
+ tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0;
+ unsigned int iterations =
+ alu_iterations > tex_iterations ? alu_iterations : tex_iterations;
+ unsigned int bank = 0;
+
+ shader->cb_code_size = 15 +
+ /* R400_US_CODE_BANK */
+ (r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) +
+ /* R400_US_CODE_EXT */
+ (r300->screen->caps.is_r400 ? 2 : 0) +
+ /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */
+ (code->r390_mode ? (5 * alu_iterations) : 4) +
+ /* R400_US_ALU_EXT_ADDR_[0-63] */
+ (code->r390_mode ? (code->alu.length) : 0) +
+ /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */
+ code->alu.length * 4 +
+ /* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */
+ (code->tex.length > 0 ? code->tex.length + tex_iterations : 0) +
+ imm_count * 5;
NEW_CB(shader->cb_code, shader->cb_code_size);
- if (r300->screen->caps.is_r400)
- OUT_CB_REG(R400_US_CODE_BANK, 0);
-
OUT_CB_REG(R300_US_CONFIG, code->config);
OUT_CB_REG(R300_US_PIXSIZE, code->pixsize);
OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset);
+ if (code->r390_mode) {
+ OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext);
+ } else if (r300->screen->caps.is_r400) {
+ /* This register appears to affect shaders even if r390_mode is
+ * disabled, so it needs to be set to 0 for shaders that
+ * don't use r390_mode. */
+ OUT_CB_REG(R400_US_CODE_EXT, 0);
+ }
+
OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4);
OUT_CB_TABLE(code->code_addr, 4);
- OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CB(code->alu.inst[i].rgb_inst);
+ do {
+ unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64);
+ unsigned int bank_alu_offset = bank * 64;
+ unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32);
+ unsigned int bank_tex_offset = bank * 32;
+
+ if (r300->screen->caps.is_r400) {
+ OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ?
+ (bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2
+ }
+
+ if (bank_alu_length > 0) {
+ OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length);
+ for (i = 0; i < bank_alu_length; i++)
+ OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length);
+ for (i = 0; i < bank_alu_length; i++)
+ OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length);
+ for (i = 0; i < bank_alu_length; i++)
+ OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst);
- OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CB(code->alu.inst[i].rgb_addr);
+ OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length);
+ for (i = 0; i < bank_alu_length; i++)
+ OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr);
- OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CB(code->alu.inst[i].alpha_inst);
+ if (code->r390_mode) {
+ OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length);
+ for (i = 0; i < bank_alu_length; i++)
+ OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr);
+ }
+ }
+
+ if (bank_tex_length > 0) {
+ OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length);
+ OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length);
+ }
- OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CB(code->alu.inst[i].alpha_addr);
+ alu_length -= bank_alu_length;
+ tex_length -= bank_tex_length;
+ bank++;
+ } while(code->r390_mode && (alu_length > 0 || tex_length > 0));
- if (code->tex.length) {
- OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
- OUT_CB_TABLE(code->tex.inst, code->tex.length);
+ /* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders
+ * will be rendered incorrectly. */
+ if (r300->screen->caps.is_r400) {
+ OUT_CB_REG(R400_US_CODE_BANK,
+ code->r390_mode ? R400_R390_MODE_ENABLE : 0);
}
/* Emit immediates. */
@@ -384,17 +440,29 @@ static void r300_translate_fragment_shader(
compiler.code = &shader->code;
compiler.state = shader->compare_state;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
+ compiler.Base.is_r400 = r300->screen->caps.is_r400;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = TRUE;
compiler.Base.has_presub = TRUE;
- compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
+ compiler.Base.max_temp_regs =
+ compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32);
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
- compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
+ compiler.Base.max_alu_insts =
+ (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64;
+ compiler.Base.max_tex_insts =
+ (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32;
compiler.AllocateHwInputs = &allocate_hardware_inputs;
compiler.UserData = &shader->inputs;
find_output_registers(&compiler, shader);
+ shader->write_all = FALSE;
+ for (i = 0; i < shader->info.num_properties; i++) {
+ if (shader->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
+ shader->write_all = TRUE;
+ }
+ }
+
if (compiler.Base.Debug & RC_DBG_LOG) {
DBG(r300, DBG_FP, "r300: Initial fragment program\n");
tgsi_dump(tokens, 0);
@@ -407,6 +475,13 @@ static void r300_translate_fragment_shader(
r300_tgsi_to_rc(&ttr, tokens);
+ if (ttr.error) {
+ fprintf(stderr, "r300 FP: Cannot translate a shader. "
+ "Using a dummy shader instead.\n");
+ r300_dummy_fragment_shader(r300, shader);
+ return;
+ }
+
if (!r300->screen->caps.is_r500 ||
compiler.Base.Program.Constants.Count > 200) {
compiler.Base.remove_unused_constants = TRUE;
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 51bfa88c5e..c86a90b85a 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -54,6 +54,9 @@ struct r300_fragment_shader_code {
uint32_t *cb_code;
struct r300_fragment_shader_code* next;
+
+ boolean write_all;
+
};
struct r300_fragment_shader {
@@ -81,4 +84,10 @@ static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_sha
return (fs->shader->code.writes_depth) ? TRUE : FALSE;
}
+static INLINE boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs)
+{
+ if (!fs)
+ return FALSE;
+ return (fs->shader->write_all) ? TRUE : FALSE;
+}
#endif /* R300_FS_H */
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index c22e307c67..ecaadf4af8 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -22,7 +22,6 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "r300_context.h"
-#include "r300_hyperz.h"
#include "r300_reg.h"
#include "r300_fs.h"
#include "r300_winsys.h"
@@ -41,58 +40,74 @@
/* The HyperZ setup */
/*****************************************************************************/
-static bool r300_get_sc_hz_max(struct r300_context *r300)
+static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300)
{
- struct r300_dsa_state *dsa_state = r300->dsa_state.state;
- int func = dsa_state->z_stencil_control & R300_ZS_MASK;
- int ret = R300_SC_HYPERZ_MIN;
+ struct r300_dsa_state *dsa = r300->dsa_state.state;
- if (func >= R300_ZS_GEQUAL && func <= R300_ZS_ALWAYS)
- ret = R300_SC_HYPERZ_MAX;
- return ret;
+ if (!dsa->dsa.depth.enabled || !dsa->dsa.depth.writemask)
+ return HIZ_FUNC_NONE;
+
+ switch (dsa->dsa.depth.func) {
+ case PIPE_FUNC_NEVER:
+ case PIPE_FUNC_EQUAL:
+ case PIPE_FUNC_NOTEQUAL:
+ case PIPE_FUNC_ALWAYS:
+ return HIZ_FUNC_NONE;
+
+ case PIPE_FUNC_LESS:
+ case PIPE_FUNC_LEQUAL:
+ return HIZ_FUNC_MAX;
+
+ case PIPE_FUNC_GREATER:
+ case PIPE_FUNC_GEQUAL:
+ return HIZ_FUNC_MIN;
+
+ default:
+ assert(0);
+ return HIZ_FUNC_NONE;
+ }
}
-static bool r300_zfunc_same_direction(int func1, int func2)
+/* Return what's used for the depth test (either minimum or maximum). */
+static unsigned r300_get_sc_hz_max(struct r300_context *r300)
{
- /* func1 is less/lessthan */
- if ((func1 == R300_ZS_LESS || func1 == R300_ZS_LEQUAL) &&
- (func2 == R300_ZS_EQUAL || func2 == R300_ZS_GEQUAL ||
- func2 == R300_ZS_GREATER))
- return FALSE;
-
- /* func1 is greater/greaterthan */
- if ((func1 == R300_ZS_GEQUAL || func1 == R300_ZS_GREATER) &&
- (func2 == R300_ZS_LESS || func2 == R300_ZS_LEQUAL))
- return FALSE;
+ struct r300_dsa_state *dsa = r300->dsa_state.state;
+ unsigned func = dsa->dsa.depth.func;
- return TRUE;
+ return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN;
}
-static int r300_get_hiz_min(struct r300_context *r300)
+static boolean r300_is_hiz_func_valid(struct r300_context *r300)
{
- struct r300_dsa_state *dsa_state = r300->dsa_state.state;
- int func = dsa_state->z_stencil_control & R300_ZS_MASK;
- int ret = R300_HIZ_MIN;
+ struct r300_dsa_state *dsa = r300->dsa_state.state;
+ unsigned func = dsa->dsa.depth.func;
- if (func == R300_ZS_LESS || func == R300_ZS_LEQUAL)
- ret = R300_HIZ_MAX;
- return ret;
+ if (r300->hiz_func == HIZ_FUNC_NONE)
+ return TRUE;
+
+ /* func1 is less/lessthan */
+ if (r300->hiz_func == HIZ_FUNC_MAX &&
+ (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER))
+ return FALSE;
+
+ /* func1 is greater/greaterthan */
+ if (r300->hiz_func == HIZ_FUNC_MIN &&
+ (func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL))
+ return FALSE;
+
+ return TRUE;
}
static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s)
{
- if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP ||
- s->zfail_op != PIPE_STENCIL_OP_KEEP))
- return TRUE;
- return FALSE;
+ return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP ||
+ s->zfail_op != PIPE_STENCIL_OP_KEEP);
}
static boolean r300_can_hiz(struct r300_context *r300)
{
- struct r300_dsa_state *dsa_state = r300->dsa_state.state;
- struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa;
- struct r300_screen* r300screen = r300->screen;
- struct r300_hyperz_state *z = r300->hyperz_state.state;
+ struct r300_dsa_state *dsa = r300->dsa_state.state;
+ struct r300_screen *r300screen = r300->screen;
/* shader writes depth - no HiZ */
if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */
@@ -100,34 +115,21 @@ static boolean r300_can_hiz(struct r300_context *r300)
if (r300->query_current)
return FALSE;
+
/* if stencil fail/zfail op is not KEEP */
- if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) ||
- r300_dsa_stencil_op_not_keep(&dsa->stencil[1]))
+ if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) ||
+ r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1]))
return FALSE;
- if (dsa->depth.enabled) {
+ if (dsa->dsa.depth.enabled) {
/* if depth func is EQUAL pre-r500 */
- if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500)
+ if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500)
return FALSE;
+
/* if depth func is NOTEQUAL */
- if (dsa->depth.func == PIPE_FUNC_NOTEQUAL)
+ if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL)
return FALSE;
}
- /* depth comparison function - if just cleared save and return okay */
- if (z->current_func == -1) {
- int func = dsa_state->z_stencil_control & R300_ZS_MASK;
- if (func != 0 && func != 7)
- z->current_func = dsa_state->z_stencil_control & R300_ZS_MASK;
- } else {
- /* simple don't change */
- if (!r300_zfunc_same_direction(z->current_func,
- (dsa_state->z_stencil_control & R300_ZS_MASK))) {
- DBG(r300, DBG_HYPERZ,
- "z func changed direction - disabling hyper-z %d -> %d\n",
- z->current_func, dsa_state->z_stencil_control);
- return FALSE;
- }
- }
return TRUE;
}
@@ -137,10 +139,8 @@ static void r300_update_hyperz(struct r300_context* r300)
(struct r300_hyperz_state*)r300->hyperz_state.state;
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- struct r300_texture *zstex =
- fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
- boolean zmask_in_use = FALSE;
- boolean hiz_in_use = FALSE;
+ struct r300_resource *zstex =
+ fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
z->gb_z_peq_config = 0;
z->zb_bw_cntl = 0;
@@ -152,48 +152,54 @@ static void r300_update_hyperz(struct r300_context* r300)
return;
}
- if (!zstex)
- return;
-
- if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
+ if (!zstex ||
+ !r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
return;
- zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level];
- hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level];
-
- /* Z fastfill. */
- if (zmask_in_use) {
- z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/
- }
-
/* Zbuffer compression. */
- if (zmask_in_use && r300->z_compression) {
- z->zb_bw_cntl |= R300_RD_COMP_ENABLE;
- if (r300->z_decomp_rd == false)
+ if (r300->zmask_in_use && !r300->hyperz_locked) {
+ z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
+ /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/
+ R300_RD_COMP_ENABLE;
+
+ if (!r300->zmask_decompress) {
z->zb_bw_cntl |= R300_WR_COMP_ENABLE;
+ }
}
- /* RV350 and up optimizations. */
- /* The section 10.4.9 in the docs is a lie. */
- if (r300->z_compression == RV350_Z_COMPRESS_88)
+
+ if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) {
z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
+ }
+
+ /* HiZ. */
+ if (r300->hiz_in_use && !r300->hyperz_locked) {
+ /* Set the HiZ function if needed. */
+ if (r300->hiz_func == HIZ_FUNC_NONE) {
+ r300->hiz_func = r300_get_hiz_func(r300);
+ }
- if (hiz_in_use) {
- bool can_hiz = r300_can_hiz(r300);
- if (can_hiz) {
- z->zb_bw_cntl |= R300_HIZ_ENABLE;
- z->sc_hyperz |= R300_SC_HYPERZ_ENABLE;
- z->sc_hyperz |= r300_get_sc_hz_max(r300);
- z->zb_bw_cntl |= r300_get_hiz_min(r300);
+ /* If the depth function is inverted, HiZ must be disabled. */
+ if (!r300_is_hiz_func_valid(r300)) {
+ r300->hiz_in_use = FALSE;
+ } else if (r300_can_hiz(r300)) {
+ /* Setup the HiZ bits. */
+ z->zb_bw_cntl |=
+ R300_HIZ_ENABLE |
+ (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX);
+
+ z->sc_hyperz |= R300_SC_HYPERZ_ENABLE |
+ r300_get_sc_hz_max(r300);
+
+ if (r300->screen->caps.is_r500) {
+ z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE;
+ }
}
}
/* R500-specific features and optimizations. */
if (r300->screen->caps.is_r500) {
- z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3;
- z->zb_bw_cntl |=
- R500_HIZ_EQUAL_REJECT_ENABLE |
- R500_PEQ_PACKING_ENABLE |
- R500_COVERED_PTR_MASKING_ENABLE;
+ z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE |
+ R500_COVERED_PTR_MASKING_ENABLE;
}
}
@@ -285,135 +291,11 @@ static void r300_update_ztop(struct r300_context* r300)
r300_mark_atom_dirty(r300, &r300->ztop_state);
}
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
-
-static void r300_update_hiz_clear(struct r300_context *r300)
-{
- struct pipe_framebuffer_state *fb =
- (struct pipe_framebuffer_state*)r300->fb_state.state;
- uint32_t height;
-
- height = ALIGN_DIVUP(fb->zsbuf->height, 4);
- r300->hiz_clear.size = height * 4;
-}
-
-static void r300_update_zmask_clear(struct r300_context *r300)
-{
- struct pipe_framebuffer_state *fb =
- (struct pipe_framebuffer_state*)r300->fb_state.state;
- uint32_t height;
- int mult;
-
- if (r300->z_compression == RV350_Z_COMPRESS_88)
- mult = 8;
- else
- mult = 4;
-
- height = ALIGN_DIVUP(fb->zsbuf->height, mult);
-
- r300->zmask_clear.size = height * 4;
-}
-
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
+
if (r300->hyperz_state.dirty) {
r300_update_hyperz(r300);
}
-
- if (r300->hiz_clear.dirty) {
- r300_update_hiz_clear(r300);
- }
- if (r300->zmask_clear.dirty) {
- r300_update_zmask_clear(r300);
- }
-}
-
-void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf)
-{
- struct r300_texture *tex;
- uint32_t zsize, ndw;
- int level = surf->base.u.tex.level;
-
- tex = r300_texture(surf->base.texture);
-
- if (tex->hiz_mem[level])
- return;
-
- zsize = tex->desc.layer_size_in_bytes[level];
- zsize /= util_format_get_blocksize(tex->desc.b.b.format);
- ndw = ALIGN_DIVUP(zsize, 64);
-
- tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0);
- return;
-}
-
-void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress)
-{
- int bsize = 256;
- uint32_t zsize, ndw;
- int level = surf->base.u.tex.level;
- struct r300_texture *tex;
-
- tex = r300_texture(surf->base.texture);
-
- /* We currently don't handle decompression for 3D textures and cubemaps
- * correctly. */
- if (tex->desc.b.b.target != PIPE_TEXTURE_1D &&
- tex->desc.b.b.target != PIPE_TEXTURE_2D &&
- tex->desc.b.b.target != PIPE_TEXTURE_RECT)
- return;
-
- /* Cannot flush zmask of 16-bit zbuffers. */
- if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16)
- return;
-
- if (tex->zmask_mem[level])
- return;
-
- zsize = tex->desc.layer_size_in_bytes[level];
- zsize /= util_format_get_blocksize(tex->desc.b.b.format);
-
- /* each zmask dword represents 16 4x4 blocks - which is 256 pixels
- or 16 8x8 depending on the gb peq flag = 1024 pixels */
- if (compress == RV350_Z_COMPRESS_88)
- bsize = 1024;
-
- ndw = ALIGN_DIVUP(zsize, bsize);
- tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0);
- return;
-}
-
-boolean r300_hyperz_init_mm(struct r300_context *r300)
-{
- struct r300_screen* r300screen = r300->screen;
- int frag_pipes = r300screen->caps.num_frag_pipes;
-
- r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
- if (!r300->zmask_mm)
- return FALSE;
-
- if (r300screen->caps.hiz_ram) {
- r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes);
- if (!r300->hiz_mm) {
- u_mmDestroy(r300->zmask_mm);
- r300->zmask_mm = NULL;
- return FALSE;
- }
- }
-
- return TRUE;
-}
-
-void r300_hyperz_destroy_mm(struct r300_context *r300)
-{
- struct r300_screen* r300screen = r300->screen;
-
- if (r300screen->caps.hiz_ram) {
- u_mmDestroy(r300->hiz_mm);
- r300->hiz_mm = NULL;
- }
-
- u_mmDestroy(r300->zmask_mm);
- r300->zmask_mm = NULL;
}
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
deleted file mode 100644
index 30a23ec649..0000000000
--- a/src/gallium/drivers/r300/r300_hyperz.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2010 Marek Olšák <maraeo@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_HYPERZ_H
-#define R300_HYPERZ_H
-
-struct r300_context;
-
-void r300_update_hyperz_state(struct r300_context* r300);
-
-void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf);
-void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress);
-
-boolean r300_hyperz_init_mm(struct r300_context *r300);
-void r300_hyperz_destroy_mm(struct r300_context *r300);
-#endif
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 6223e04321..717485f43c 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -57,10 +57,10 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
insert_at_tail(&r300->query_list, q);
/* Open up the occlusion query buffer. */
- q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096,
+ q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096,
PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM,
q->domain);
- q->cs_buffer = r300->rws->buffer_get_cs_handle(r300->rws, q->buffer);
+ q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf);
return (struct pipe_query*)q;
}
@@ -68,10 +68,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
static void r300_destroy_query(struct pipe_context* pipe,
struct pipe_query* query)
{
- struct r300_context *r300 = r300_context(pipe);
struct r300_query* q = r300_query(query);
- r300->rws->buffer_reference(r300->rws, &q->buffer, NULL);
+ r300_winsys_bo_reference(&q->buf, NULL);
remove_from_list(q);
FREE(query);
}
@@ -128,16 +127,12 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
struct r300_query *q = r300_query(query);
- unsigned flags, i;
+ unsigned i;
uint32_t temp, *map;
- uint64_t *result = (uint64_t*)vresult;
-
- if (!q->flushed)
- pipe->flush(pipe, 0, NULL);
-
- flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0);
- map = r300->rws->buffer_map(r300->rws, q->buffer, r300->cs, flags);
+ map = r300->rws->buffer_map(q->buf, r300->cs,
+ PIPE_TRANSFER_READ |
+ (!wait ? PIPE_TRANSFER_DONTBLOCK : 0));
if (!map)
return FALSE;
@@ -148,9 +143,9 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
map++;
}
- r300->rws->buffer_unmap(r300->rws, q->buffer);
+ r300->rws->buffer_unmap(q->buf);
- *result = temp;
+ *((uint64_t*)vresult) = temp;
return TRUE;
}
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 613186e815..bb30b1ab0b 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -467,6 +467,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view
*/
+#define R500_VAP_TEX_TO_COLOR_CNTL 0x2218
+
#define R300_VAP_CLIP_CNTL 0x221C
# define R300_VAP_UCP_ENABLE_0 (1 << 0)
# define R300_VAP_UCP_ENABLE_1 (1 << 1)
@@ -857,6 +859,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R500_TX_DIRECTION_HORIZONTAL (0<<27)
# define R500_TX_DIRECTION_VERITCAL (1<<27)
+#define R500_SU_TEX_WRAP_PS3 0x4114
+
/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
#define R300_GA_POINT_S0 0x4200
@@ -2162,14 +2166,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* R4xx extended fragment shader registers. */
#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */
-# define R400_ADDR0_EXT_RGB_MSB_BIT 0x01
-# define R400_ADDR1_EXT_RGB_MSB_BIT 0x02
-# define R400_ADDR2_EXT_RGB_MSB_BIT 0x04
+# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x))
# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08
-# define R400_ADDR0_EXT_A_MSB_BIT 0x10
-# define R400_ADDR1_EXT_A_MSB_BIT 0x20
-# define R400_ADDR2_EXT_A_MSB_BIT 0x40
+# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4))
# define R400_ADDRD_EXT_A_MSB_BIT 0x80
+
#define R400_US_CODE_BANK 0x46b8
# define R400_BANK_SHIFT 0
# define R400_BANK_MASK 0xf
@@ -2631,8 +2632,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_ZB_BW_CNTL 0x4f1c
# define R300_HIZ_DISABLE (0 << 0)
# define R300_HIZ_ENABLE (1 << 0)
-# define R300_HIZ_MIN (0 << 1)
-# define R300_HIZ_MAX (1 << 1)
+# define R300_HIZ_MAX (0 << 1)
+# define R300_HIZ_MIN (1 << 1)
# define R300_FAST_FILL_DISABLE (0 << 2)
# define R300_FAST_FILL_ENABLE (1 << 2)
# define R300_RD_COMP_DISABLE (0 << 3)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index b4197e0352..26594dabe4 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -39,7 +39,6 @@
#include "r300_screen_buffer.h"
#include "r300_emit.h"
#include "r300_reg.h"
-#include "r300_state_derived.h"
#include <limits.h>
@@ -128,16 +127,30 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias)
END_CS;
}
+static void r300_emit_draw_init(struct r300_context *r300, unsigned mode,
+ unsigned min_index, unsigned max_index)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(5);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, mode));
+ OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CS(max_index);
+ OUT_CS(min_index);
+ END_CS;
+}
+
/* This function splits the index bias value into two parts:
* - buffer_offset: the value that can be safely added to buffer offsets
- * in r300_emit_aos (it must yield a positive offset when added to
+ * in r300_emit_vertex_arrays (it must yield a positive offset when added to
* a vertex buffer offset)
* - index_offset: the value that must be manually subtracted from indices
* in an index buffer to achieve negative offsets. */
static void r300_split_index_bias(struct r300_context *r300, int index_bias,
int *buffer_offset, int *index_offset)
{
- struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer;
+ struct pipe_vertex_buffer *vb, *vbufs = r300->vbuf_mgr->vertex_buffer;
struct pipe_vertex_element *velem = r300->velems->velem;
unsigned i, size;
int max_neg_bias;
@@ -164,10 +177,10 @@ static void r300_split_index_bias(struct r300_context *r300, int index_bias,
}
enum r300_prepare_flags {
- PREP_FIRST_DRAW = (1 << 0), /* call emit_dirty_state and friends? */
+ PREP_EMIT_STATES = (1 << 0), /* call emit_dirty_state and friends? */
PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */
- PREP_EMIT_AOS = (1 << 2), /* call emit_aos? */
- PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_aos_swtcl? */
+ PREP_EMIT_AOS = (1 << 2), /* call emit_vertex_arrays? */
+ PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */
PREP_INDEXED = (1 << 4) /* is this draw_elements? */
};
@@ -180,33 +193,33 @@ enum r300_prepare_flags {
* \return TRUE if the CS was flushed
*/
static boolean r300_reserve_cs_dwords(struct r300_context *r300,
- enum r300_prepare_flags flags,
- unsigned cs_dwords)
+ enum r300_prepare_flags flags,
+ unsigned cs_dwords)
{
boolean flushed = FALSE;
- boolean first_draw = flags & PREP_FIRST_DRAW;
- boolean emit_aos = flags & PREP_EMIT_AOS;
- boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL;
+ boolean first_draw = flags & PREP_EMIT_STATES;
+ boolean emit_vertex_arrays = flags & PREP_EMIT_AOS;
+ boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL;
/* Add dirty state, index offset, and AOS. */
if (first_draw) {
cs_dwords += r300_get_num_dirty_dwords(r300);
- if (r300->screen->caps.index_bias_supported)
+ if (r300->screen->caps.is_r500)
cs_dwords += 2; /* emit_index_offset */
- if (emit_aos)
- cs_dwords += 55; /* emit_aos */
+ if (emit_vertex_arrays)
+ cs_dwords += 55; /* emit_vertex_arrays */
- if (emit_aos_swtcl)
- cs_dwords += 7; /* emit_aos_swtcl */
+ if (emit_vertex_arrays_swtcl)
+ cs_dwords += 7; /* emit_vertex_arrays_swtcl */
}
cs_dwords += r300_get_num_cs_end_dwords(r300);
/* Reserve requested CS space. */
if (cs_dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
- r300->context.flush(&r300->context, 0, NULL);
+ r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL);
flushed = TRUE;
}
@@ -218,57 +231,55 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300,
* \param r300 The context.
* \param flags See r300_prepare_flags.
* \param index_buffer The index buffer to validate. The parameter may be NULL.
- * \param aos_offset The offset passed to emit_aos.
+ * \param buffer_offset The offset passed to emit_vertex_arrays.
* \param index_bias The index bias to emit.
+ * \param instance_id Index of instance to render
* \return TRUE if rendering should be skipped
*/
static boolean r300_emit_states(struct r300_context *r300,
enum r300_prepare_flags flags,
struct pipe_resource *index_buffer,
- int aos_offset,
- int index_bias)
+ int buffer_offset,
+ int index_bias, int instance_id)
{
- boolean first_draw = flags & PREP_FIRST_DRAW;
- boolean emit_aos = flags & PREP_EMIT_AOS;
- boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL;
+ boolean first_draw = flags & PREP_EMIT_STATES;
+ boolean emit_vertex_arrays = flags & PREP_EMIT_AOS;
+ boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL;
boolean indexed = flags & PREP_INDEXED;
boolean validate_vbos = flags & PREP_VALIDATE_VBOS;
/* Validate buffers and emit dirty state if needed. */
if (first_draw) {
- /* upload buffers first */
- if (r300->screen->caps.has_tcl && r300->any_user_vbs) {
- r300_upload_user_buffers(r300);
- r300->any_user_vbs = false;
- }
-
- if (r300->validate_buffers) {
- if (!r300_emit_buffer_validate(r300, validate_vbos,
- index_buffer)) {
- fprintf(stderr, "r300: CS space validation failed. "
- "(not enough memory?) Skipping rendering.\n");
- return FALSE;
- }
-
- /* Consider the validation done only if everything was validated. */
- if (validate_vbos) {
- r300->validate_buffers = FALSE;
- }
+ if (!r300_emit_buffer_validate(r300, validate_vbos,
+ index_buffer)) {
+ fprintf(stderr, "r300: CS space validation failed. "
+ "(not enough memory?) Skipping rendering.\n");
+ return FALSE;
}
r300_emit_dirty_state(r300);
- if (r300->screen->caps.index_bias_supported) {
+ if (r300->screen->caps.is_r500) {
if (r300->screen->caps.has_tcl)
r500_emit_index_bias(r300, index_bias);
else
r500_emit_index_bias(r300, 0);
}
- if (emit_aos)
- r300_emit_aos(r300, aos_offset, indexed);
+ if (emit_vertex_arrays &&
+ (r300->vertex_arrays_dirty ||
+ r300->vertex_arrays_indexed != indexed ||
+ r300->vertex_arrays_offset != buffer_offset ||
+ r300->vertex_arrays_instance_id != instance_id)) {
+ r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id);
+
+ r300->vertex_arrays_dirty = FALSE;
+ r300->vertex_arrays_indexed = indexed;
+ r300->vertex_arrays_offset = buffer_offset;
+ r300->vertex_arrays_instance_id = instance_id;
+ }
- if (emit_aos_swtcl)
- r300_emit_aos_swtcl(r300, indexed);
+ if (emit_vertex_arrays_swtcl)
+ r300_emit_vertex_arrays_swtcl(r300, indexed);
}
return TRUE;
@@ -281,28 +292,32 @@ static boolean r300_emit_states(struct r300_context *r300,
* \param flags See r300_prepare_flags.
* \param index_buffer The index buffer to validate. The parameter may be NULL.
* \param cs_dwords The number of dwords to reserve in CS.
- * \param aos_offset The offset passed to emit_aos.
+ * \param buffer_offset The offset passed to emit_vertex_arrays.
* \param index_bias The index bias to emit.
+ * \param instance_id The instance to render.
* \return TRUE if rendering should be skipped
*/
static boolean r300_prepare_for_rendering(struct r300_context *r300,
enum r300_prepare_flags flags,
struct pipe_resource *index_buffer,
unsigned cs_dwords,
- int aos_offset,
- int index_bias)
+ int buffer_offset,
+ int index_bias,
+ int instance_id)
{
+ /* Make sure there is enough space in the command stream and emit states. */
if (r300_reserve_cs_dwords(r300, flags, cs_dwords))
- flags |= PREP_FIRST_DRAW;
+ flags |= PREP_EMIT_STATES;
- return r300_emit_states(r300, flags, index_buffer, aos_offset, index_bias);
+ return r300_emit_states(r300, flags, index_buffer, buffer_offset,
+ index_bias, instance_id);
}
static boolean immd_is_good_idea(struct r300_context *r300,
unsigned count)
{
struct pipe_vertex_element* velem;
- struct pipe_vertex_buffer* vbuf;
+ struct pipe_resource *buf;
boolean checked[PIPE_MAX_ATTRIBS] = {0};
unsigned vertex_element_count = r300->velems->count;
unsigned i, vbi;
@@ -326,18 +341,12 @@ static boolean immd_is_good_idea(struct r300_context *r300,
vbi = velem->vertex_buffer_index;
if (!checked[vbi]) {
- vbuf = &r300->vertex_buffer[vbi];
+ buf = r300->vbuf_mgr->real_vertex_buffer[vbi];
- if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) {
+ if ((r300_resource(buf)->domain != R300_DOMAIN_GTT)) {
return FALSE;
}
- if (r300_buffer_is_referenced(&r300->context,
- vbuf->buffer,
- R300_REF_CS | R300_REF_HW)) {
- /* It's a very bad idea to map it... */
- return FALSE;
- }
checked[vbi] = TRUE;
}
}
@@ -348,10 +357,8 @@ static boolean immd_is_good_idea(struct r300_context *r300,
* The HWTCL draw functions. *
****************************************************************************/
-static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
- unsigned mode,
- unsigned start,
- unsigned count)
+static void r300_draw_arrays_immediate(struct r300_context *r300,
+ const struct pipe_draw_info *info)
{
struct pipe_vertex_element* velem;
struct pipe_vertex_buffer* vbuf;
@@ -362,7 +369,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
unsigned vertex_size = r300->velems->vertex_size_dwords;
/* The number of dwords for this draw operation. */
- unsigned dwords = 9 + count * vertex_size;
+ unsigned dwords = 4 + info->count * vertex_size;
/* Size of the vertex element, in dwords. */
unsigned size[PIPE_MAX_ATTRIBS];
@@ -372,47 +379,42 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
unsigned stride[PIPE_MAX_ATTRIBS];
/* Mapped vertex buffers. */
- uint32_t* map[PIPE_MAX_ATTRIBS];
+ uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
uint32_t* mapelem[PIPE_MAX_ATTRIBS];
- struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0};
CS_LOCALS(r300);
- if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0))
+ if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1))
return;
/* Calculate the vertex size, offsets, strides etc. and map the buffers. */
for (i = 0; i < vertex_element_count; i++) {
velem = &r300->velems->velem[i];
- size[i] = r300->velems->hw_format_size[i] / 4;
+ size[i] = r300->velems->format_size[i] / 4;
vbi = velem->vertex_buffer_index;
- vbuf = &r300->vertex_buffer[vbi];
+ vbuf = &r300->vbuf_mgr->vertex_buffer[vbi];
stride[i] = vbuf->stride / 4;
/* Map the buffer. */
- if (!transfer[vbi]) {
- map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context,
- vbuf->buffer,
- PIPE_TRANSFER_READ,
- &transfer[vbi]);
- map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start;
+ if (!map[vbi]) {
+ map[vbi] = (uint32_t*)r300->rws->buffer_map(
+ r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf,
+ r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED);
+ map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start;
}
mapelem[i] = map[vbi] + (velem->src_offset / 4);
}
+ r300_emit_draw_init(r300, info->mode, 0, info->count-1);
+
BEGIN_CS(dwords);
- OUT_CS_REG(R300_GA_COLOR_CONTROL,
- r300_provoking_vertex_fixes(r300, mode));
OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
- OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
- OUT_CS(count - 1);
- OUT_CS(0);
- OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
- OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
- r300_translate_primitive(mode));
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, info->count * vertex_size);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (info->count << 16) |
+ r300_translate_primitive(info->mode));
/* Emit vertices. */
- for (v = 0; v < count; v++) {
+ for (v = 0; v < info->count; v++) {
for (i = 0; i < vertex_element_count; i++) {
OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]);
}
@@ -423,10 +425,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
for (i = 0; i < vertex_element_count; i++) {
vbi = r300->velems->velem[i].vertex_buffer_index;
- if (transfer[vbi]) {
- vbuf = &r300->vertex_buffer[vbi];
- pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]);
- transfer[vbi] = NULL;
+ if (map[vbi]) {
+ r300->rws->buffer_unmap(r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf);
+ map[vbi] = NULL;
}
}
}
@@ -444,15 +445,12 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
return;
}
- BEGIN_CS(7 + (alt_num_verts ? 2 : 0));
+ r300_emit_draw_init(r300, mode, 0, count-1);
+
+ BEGIN_CS(2 + (alt_num_verts ? 2 : 0));
if (alt_num_verts) {
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
}
- OUT_CS_REG(R300_GA_COLOR_CONTROL,
- r300_provoking_vertex_fixes(r300, mode));
- OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
- OUT_CS(count - 1);
- OUT_CS(0);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
r300_translate_primitive(mode) |
@@ -463,37 +461,53 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
static void r300_emit_draw_elements(struct r300_context *r300,
struct pipe_resource* indexBuffer,
unsigned indexSize,
- unsigned minIndex,
- unsigned maxIndex,
+ unsigned min_index,
+ unsigned max_index,
unsigned mode,
unsigned start,
- unsigned count)
+ unsigned count,
+ uint16_t *imm_indices3)
{
- uint32_t count_dwords;
- uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
+ uint32_t count_dwords, offset_dwords;
boolean alt_num_verts = count > 65535;
CS_LOCALS(r300);
- if (count >= (1 << 24)) {
+ if (count >= (1 << 24) || max_index >= (1 << 24)) {
fprintf(stderr, "r300: Got a huge number of vertices: %i, "
- "refusing to render.\n", count);
+ "refusing to render (max_index: %i).\n", count, max_index);
return;
}
- maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);
-
DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n",
- count, minIndex, maxIndex);
+ count, min_index, max_index);
+
+ r300_emit_draw_init(r300, mode, min_index, max_index);
+
+ /* If start is odd, render the first triangle with indices embedded
+ * in the command stream. This will increase start by 3 and make it
+ * even. We can then proceed without a fallback. */
+ if (indexSize == 2 && (start & 1) &&
+ mode == PIPE_PRIM_TRIANGLES) {
+ BEGIN_CS(4);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 2);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (3 << 16) |
+ R300_VAP_VF_CNTL__PRIM_TRIANGLES);
+ OUT_CS(imm_indices3[1] << 16 | imm_indices3[0]);
+ OUT_CS(imm_indices3[2]);
+ END_CS;
- BEGIN_CS(13 + (alt_num_verts ? 2 : 0));
+ start += 3;
+ count -= 3;
+ if (!count)
+ return;
+ }
+
+ offset_dwords = indexSize * start / sizeof(uint32_t);
+
+ BEGIN_CS(8 + (alt_num_verts ? 2 : 0));
if (alt_num_verts) {
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
}
- OUT_CS_REG(R300_GA_COLOR_CONTROL,
- r300_provoking_vertex_fixes(r300, mode));
- OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
- OUT_CS(maxIndex);
- OUT_CS(minIndex);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
if (indexSize == 4) {
count_dwords = count;
@@ -508,68 +522,169 @@ static void r300_emit_draw_elements(struct r300_context *r300,
(alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
}
- /* INDX_BUFFER is a truly special packet3.
- * Unlike most other packet3, where the offset is after the count,
- * the order is reversed, so the relocation ends up carrying the
- * size of the indexbuf instead of the offset.
- */
OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
(0 << R300_INDX_BUFFER_SKIP_SHIFT));
OUT_CS(offset_dwords << 2);
- OUT_CS_BUF_RELOC(indexBuffer, count_dwords,
- r300_buffer(indexBuffer)->domain, 0);
+ OUT_CS(count_dwords);
+ OUT_CS_RELOC(r300_resource(indexBuffer));
+ END_CS;
+}
+
+static void r300_draw_elements_immediate(struct r300_context *r300,
+ const struct pipe_draw_info *info)
+{
+ uint8_t *ptr1;
+ uint16_t *ptr2;
+ uint32_t *ptr4;
+ unsigned index_size = r300->index_buffer.index_size;
+ unsigned i, count_dwords = index_size == 4 ? info->count :
+ (info->count + 1) / 2;
+ CS_LOCALS(r300);
+
+ /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */
+ if (!r300_prepare_for_rendering(r300,
+ PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS |
+ PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1))
+ return;
+
+ r300_emit_draw_init(r300, info->mode, info->min_index, info->max_index);
+
+ BEGIN_CS(2 + count_dwords);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords);
+ switch (index_size) {
+ case 1:
+ ptr1 = r300_resource(r300->index_buffer.buffer)->b.user_ptr;
+ ptr1 += info->start;
+
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
+ r300_translate_primitive(info->mode));
+
+ if (info->index_bias && !r300->screen->caps.is_r500) {
+ for (i = 0; i < info->count-1; i += 2)
+ OUT_CS(((ptr1[i+1] + info->index_bias) << 16) |
+ (ptr1[i] + info->index_bias));
+
+ if (info->count & 1)
+ OUT_CS(ptr1[i] + info->index_bias);
+ } else {
+ for (i = 0; i < info->count-1; i += 2)
+ OUT_CS(((ptr1[i+1]) << 16) |
+ (ptr1[i] ));
+
+ if (info->count & 1)
+ OUT_CS(ptr1[i]);
+ }
+ break;
+
+ case 2:
+ ptr2 = (uint16_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr;
+ ptr2 += info->start;
+
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
+ r300_translate_primitive(info->mode));
+
+ if (info->index_bias && !r300->screen->caps.is_r500) {
+ for (i = 0; i < info->count-1; i += 2)
+ OUT_CS(((ptr2[i+1] + info->index_bias) << 16) |
+ (ptr2[i] + info->index_bias));
+
+ if (info->count & 1)
+ OUT_CS(ptr2[i] + info->index_bias);
+ } else {
+ OUT_CS_TABLE(ptr2, count_dwords);
+ }
+ break;
+
+ case 4:
+ ptr4 = (uint32_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr;
+ ptr4 += info->start;
+
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
+ R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
+ r300_translate_primitive(info->mode));
+
+ if (info->index_bias && !r300->screen->caps.is_r500) {
+ for (i = 0; i < info->count; i++)
+ OUT_CS(ptr4[i] + info->index_bias);
+ } else {
+ OUT_CS_TABLE(ptr4, count_dwords);
+ }
+ break;
+ }
END_CS;
}
-/* This is the fast-path drawing & emission for HW TCL. */
-static void r300_draw_range_elements(struct pipe_context* pipe,
- struct pipe_resource* indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count)
+static void r300_draw_elements(struct r300_context *r300,
+ const struct pipe_draw_info *info,
+ int instance_id)
{
- struct r300_context* r300 = r300_context(pipe);
+ struct pipe_resource *indexBuffer = r300->index_buffer.buffer;
+ unsigned indexSize = r300->index_buffer.index_size;
struct pipe_resource* orgIndexBuffer = indexBuffer;
+ unsigned start = info->start;
+ unsigned count = info->count;
boolean alt_num_verts = r300->screen->caps.is_r500 &&
- count > 65536 &&
- r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
+ count > 65536;
unsigned short_count;
int buffer_offset = 0, index_offset = 0; /* for index bias emulation */
- unsigned new_offset;
+ uint16_t indices3[3];
- if (indexBias && !r300->screen->caps.index_bias_supported) {
- r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset);
+ if (info->index_bias && !r300->screen->caps.is_r500) {
+ r300_split_index_bias(r300, info->index_bias, &buffer_offset, &index_offset);
}
r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset,
&start, count);
- r300_update_derived_state(r300);
- r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count, &new_offset);
-
- start = new_offset;
+ /* Fallback for misaligned ushort indices. */
+ if (indexSize == 2 && (start & 1) &&
+ !r300_resource(indexBuffer)->b.user_ptr) {
+ /* If we got here, then orgIndexBuffer == indexBuffer. */
+ uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->buf,
+ r300->cs,
+ PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_UNSYNCHRONIZED);
+
+ if (info->mode == PIPE_PRIM_TRIANGLES) {
+ memcpy(indices3, ptr + start, 6);
+ } else {
+ /* Copy the mapped index buffer directly to the upload buffer.
+ * The start index will be aligned simply from the fact that
+ * every sub-buffer in the upload buffer is aligned. */
+ r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start,
+ count, (uint8_t*)ptr);
+ }
+ r300->rws->buffer_unmap(r300_resource(orgIndexBuffer)->buf);
+ } else {
+ if (r300_resource(indexBuffer)->b.user_ptr)
+ r300_upload_index_buffer(r300, &indexBuffer, indexSize,
+ &start, count,
+ r300_resource(indexBuffer)->b.user_ptr);
+ }
- /* 15 dwords for emit_draw_elements. Give up if the function fails. */
+ /* 19 dwords for emit_draw_elements. Give up if the function fails. */
if (!r300_prepare_for_rendering(r300,
- PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS |
- PREP_INDEXED, indexBuffer, 15, buffer_offset, indexBias))
+ PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS |
+ PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias,
+ instance_id))
goto done;
if (alt_num_verts || count <= 65535) {
- r300_emit_draw_elements(r300, indexBuffer, indexSize,
- minIndex, maxIndex, mode, start, count);
+ r300_emit_draw_elements(r300, indexBuffer, indexSize, info->min_index,
+ info->max_index, info->mode, start, count,
+ indices3);
} else {
do {
- short_count = MIN2(count, 65534);
+ if (indexSize == 2 && (start & 1))
+ short_count = MIN2(count, 65535);
+ else
+ short_count = MIN2(count, 65534);
+
r300_emit_draw_elements(r300, indexBuffer, indexSize,
- minIndex, maxIndex,
- mode, start, short_count);
+ info->min_index, info->max_index,
+ info->mode, start, short_count, indices3);
start += short_count;
count -= short_count;
@@ -578,7 +693,8 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
if (count) {
if (!r300_prepare_for_rendering(r300,
PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
- indexBuffer, 15, buffer_offset, indexBias))
+ indexBuffer, 19, buffer_offset, info->index_bias,
+ instance_id))
goto done;
}
} while (count);
@@ -590,107 +706,112 @@ done:
}
}
-static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
- unsigned start, unsigned count)
+static void r300_draw_arrays(struct r300_context *r300,
+ const struct pipe_draw_info *info,
+ int instance_id)
{
- struct r300_context* r300 = r300_context(pipe);
boolean alt_num_verts = r300->screen->caps.is_r500 &&
- count > 65536 &&
- r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
+ info->count > 65536;
+ unsigned start = info->start;
+ unsigned count = info->count;
unsigned short_count;
- r300_update_derived_state(r300);
+ /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
+ if (!r300_prepare_for_rendering(r300,
+ PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
+ NULL, 9, start, 0, instance_id))
+ return;
- if (immd_is_good_idea(r300, count)) {
- r300_emit_draw_arrays_immediate(r300, mode, start, count);
+ if (alt_num_verts || count <= 65535) {
+ r300_emit_draw_arrays(r300, info->mode, count);
} else {
- /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
- if (!r300_prepare_for_rendering(r300,
- PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
- NULL, 9, start, 0))
- return;
+ do {
+ short_count = MIN2(count, 65535);
+ r300_emit_draw_arrays(r300, info->mode, short_count);
- if (alt_num_verts || count <= 65535) {
- r300_emit_draw_arrays(r300, mode, count);
- } else {
- do {
- short_count = MIN2(count, 65535);
- r300_emit_draw_arrays(r300, mode, short_count);
-
- start += short_count;
- count -= short_count;
-
- /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
- if (count) {
- if (!r300_prepare_for_rendering(r300,
- PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
- start, 0))
- return;
- }
- } while (count);
- }
+ start += short_count;
+ count -= short_count;
+
+ /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
+ if (count) {
+ if (!r300_prepare_for_rendering(r300,
+ PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
+ start, 0, instance_id))
+ return;
+ }
+ } while (count);
}
}
+static void r300_draw_arrays_instanced(struct r300_context *r300,
+ const struct pipe_draw_info *info)
+{
+ int i;
+
+ for (i = 0; i < info->instance_count; i++)
+ r300_draw_arrays(r300, info, i);
+}
+
+static void r300_draw_elements_instanced(struct r300_context *r300,
+ const struct pipe_draw_info *info)
+{
+ int i;
+
+ for (i = 0; i < info->instance_count; i++)
+ r300_draw_elements(r300, info, i);
+}
+
static void r300_draw_vbo(struct pipe_context* pipe,
- const struct pipe_draw_info *info)
+ const struct pipe_draw_info *dinfo)
{
struct r300_context* r300 = r300_context(pipe);
- unsigned count = info->count;
- boolean translate = FALSE;
- boolean indexed = info->indexed && r300->index_buffer.buffer;
- unsigned start_indexed = 0;
+ struct pipe_draw_info info = *dinfo;
+ boolean buffers_updated, uploader_flushed;
- if (r300->skip_rendering) {
- return;
- }
+ info.indexed = info.indexed && r300->index_buffer.buffer;
- if (!u_trim_pipe_prim(info->mode, &count)) {
+ if (r300->skip_rendering ||
+ !u_trim_pipe_prim(info.mode, &info.count)) {
return;
}
- /* Index buffer range checking. */
- if (indexed) {
- assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0);
-
- /* Compute start for draw_elements, taking the offset into account. */
- start_indexed =
- info->start +
- (r300->index_buffer.offset / r300->index_buffer.index_size);
-
- if ((start_indexed + count) * r300->index_buffer.index_size >
- r300->index_buffer.buffer->width0) {
- fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n");
- return;
- }
- }
+ r300_update_derived_state(r300);
- /* Set up fallback for incompatible vertex layout if needed. */
- if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
- r300_begin_vertex_translate(r300);
- translate = TRUE;
+ /* Start the vbuf manager and update buffers if needed. */
+ u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info,
+ &buffers_updated, &uploader_flushed);
+ if (buffers_updated) {
+ r300->vertex_arrays_dirty = TRUE;
}
- if (indexed) {
- r300_draw_range_elements(pipe,
- r300->index_buffer.buffer,
- r300->index_buffer.index_size,
- info->index_bias,
- info->min_index,
- info->max_index,
- info->mode,
- start_indexed,
- count);
+ /* Draw. */
+ if (info.indexed) {
+ info.start += r300->index_buffer.offset;
+ info.max_index = MIN2(r300->vbuf_mgr->max_index, info.max_index);
+
+ if (info.instance_count <= 1) {
+ if (info.count <= 8 &&
+ r300_resource(r300->index_buffer.buffer)->b.user_ptr) {
+ r300_draw_elements_immediate(r300, &info);
+ } else {
+ r300_draw_elements(r300, &info, -1);
+ }
+ } else {
+ r300_draw_elements_instanced(r300, &info);
+ }
} else {
- r300_draw_arrays(pipe,
- info->mode,
- info->start,
- count);
+ if (info.instance_count <= 1) {
+ if (immd_is_good_idea(r300, info.count)) {
+ r300_draw_arrays_immediate(r300, &info);
+ } else {
+ r300_draw_arrays(r300, &info, -1);
+ }
+ } else {
+ r300_draw_arrays_instanced(r300, &info);
+ }
}
- if (translate) {
- r300_end_vertex_translate(r300);
- }
+ u_vbuf_mgr_draw_end(r300->vbuf_mgr);
}
/****************************************************************************
@@ -721,15 +842,16 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
r300_update_derived_state(r300);
r300_reserve_cs_dwords(r300,
- PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL |
+ PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL |
(indexed ? PREP_INDEXED : 0),
indexed ? 256 : 6);
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- if (r300->vertex_buffer[i].buffer) {
+ for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) {
+ if (r300->vbuf_mgr->vertex_buffer[i].buffer) {
void *buf = pipe_buffer_map(pipe,
- r300->vertex_buffer[i].buffer,
- PIPE_TRANSFER_READ,
+ r300->vbuf_mgr->vertex_buffer[i].buffer,
+ PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_UNSYNCHRONIZED,
&vb_transfer[i]);
draw_set_mapped_vertex_buffer(r300->draw, i, buf);
}
@@ -737,7 +859,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
if (indexed) {
indices = pipe_buffer_map(pipe, r300->index_buffer.buffer,
- PIPE_TRANSFER_READ, &ib_transfer);
+ PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_UNSYNCHRONIZED, &ib_transfer);
}
draw_set_mapped_index_buffer(r300->draw, indices);
@@ -748,16 +871,15 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
draw_flush(r300->draw);
r300->draw_vbo_locked = FALSE;
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- if (r300->vertex_buffer[i].buffer) {
- pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
- vb_transfer[i]);
+ for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) {
+ if (r300->vbuf_mgr->vertex_buffer[i].buffer) {
+ pipe_buffer_unmap(pipe, vb_transfer[i]);
draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
}
}
if (indexed) {
- pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer);
+ pipe_buffer_unmap(pipe, ib_transfer);
draw_set_mapped_index_buffer(r300->draw, NULL);
}
}
@@ -813,10 +935,10 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
pipe_resource_reference(&r300->vbo, NULL);
r300->vbo = pipe_buffer_create(screen,
PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
R300_MAX_DRAW_VBO_SIZE);
r300->draw_vbo_offset = 0;
r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE;
- r300->validate_buffers = TRUE;
}
r300render->vertex_size = vertex_size;
@@ -835,7 +957,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render)
r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context,
r300->vbo,
- PIPE_TRANSFER_WRITE,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_UNSYNCHRONIZED,
&r300render->vbo_transfer);
assert(r300render->vbo_ptr);
@@ -857,7 +980,7 @@ static void r300_render_unmap_vertices(struct vbuf_render* render,
r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
r300render->vertex_size * (max + 1));
- pipe_buffer_unmap(context, r300->vbo, r300render->vbo_transfer);
+ pipe_buffer_unmap(context, r300render->vbo_transfer);
r300render->vbo_transfer = NULL;
}
@@ -901,33 +1024,16 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
if (r300->draw_first_emitted) {
if (!r300_prepare_for_rendering(r300,
- PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
- NULL, 6, 0, 0))
+ PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL,
+ NULL, dwords, 0, 0, -1))
return;
} else {
if (!r300_emit_states(r300,
- PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
- NULL, 0, 0))
+ PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL,
+ NULL, 0, 0, -1))
return;
}
- /* Uncomment to dump all VBOs rendered through this interface.
- * Slow and noisy!
- ptr = pipe_buffer_map(&r300render->r300->context,
- r300render->vbo, PIPE_TRANSFER_READ,
- &r300render->vbo_transfer);
-
- for (i = 0; i < count; i++) {
- printf("r300: Vertex %d\n", i);
- draw_dump_emitted_vertex(&r300->vertex_info, ptr);
- ptr += r300->vertex_info.size * 4;
- printf("\n");
- }
-
- pipe_buffer_unmap(&r300render->r300->context, r300render->vbo,
- r300render->vbo_transfer);
- */
-
BEGIN_CS(dwords);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, r300render->prim));
@@ -958,13 +1064,13 @@ static void r300_render_draw_elements(struct vbuf_render* render,
if (r300->draw_first_emitted) {
if (!r300_prepare_for_rendering(r300,
- PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
- NULL, 256, 0, 0))
+ PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
+ NULL, 256, 0, 0, -1))
return;
} else {
if (!r300_emit_states(r300,
- PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
- NULL, 0, 0))
+ PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
+ NULL, 0, 0, -1))
return;
}
@@ -1001,7 +1107,7 @@ static void r300_render_draw_elements(struct vbuf_render* render,
if (count) {
if (!r300_prepare_for_rendering(r300,
PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
- NULL, 256, 0, 0))
+ NULL, 256, 0, 0, -1))
return;
end_cs_dwords = r300_get_num_cs_end_dwords(r300);
@@ -1022,8 +1128,7 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300)
r300render->r300 = r300;
- /* XXX find real numbers plz */
- r300render->base.max_vertex_buffer_bytes = 128 * 1024;
+ r300render->base.max_vertex_buffer_bytes = 1024 * 1024;
r300render->base.max_indices = 16 * 1024;
r300render->base.get_vertex_info = r300_render_get_vertex_info;
@@ -1106,7 +1211,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
r300->clip_state.dirty = FALSE;
r300->viewport_state.dirty = FALSE;
- if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0))
+ if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1))
goto done;
DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");
@@ -1190,7 +1295,7 @@ static void r300_resource_resolve(struct pipe_context* pipe,
aa->aaresolve_ctl =
R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE;
- r300->aa_state.size = 12;
+ r300->aa_state.size = 10;
r300_mark_atom_dirty(r300, &r300->aa_state);
/* Resolve the surface. */
diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c
index 41a43b04de..f8c7558f4b 100644
--- a/src/gallium/drivers/r300/r300_render_translate.c
+++ b/src/gallium/drivers/r300/r300_render_translate.c
@@ -20,203 +20,64 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-/**
- * The functions below translate vertex and index buffers to the layout
- * compatible with the hardware, so that all vertex and index fetches are
- * DWORD-aligned and all used vertex and index formats are supported.
- * For indices, an optional index offset is added to each index.
- */
-
#include "r300_context.h"
-#include "translate/translate.h"
#include "util/u_index_modify.h"
+#include "util/u_upload_mgr.h"
-void r300_begin_vertex_translate(struct r300_context *r300)
-{
- struct pipe_context *pipe = &r300->context;
- struct translate_key key = {0};
- struct translate_element *te;
- unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0};
- struct translate *tr;
- struct r300_vertex_element_state *ve = r300->velems;
- boolean vb_translated[PIPE_MAX_ATTRIBS] = {0};
- void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map;
- struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
- struct pipe_resource *out_buffer;
- unsigned i, num_verts;
-
- /* Initialize the translate key, i.e. the recipe how vertices should be
- * translated. */
- for (i = 0; i < ve->count; i++) {
- struct pipe_vertex_buffer *vb =
- &r300->vertex_buffer[ve->velem[i].vertex_buffer_index];
- enum pipe_format output_format = ve->hw_format[i];
- unsigned output_format_size = ve->hw_format_size[i];
-
- /* Check for support. */
- if (ve->velem[i].src_format == ve->hw_format[i] &&
- (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 &&
- vb->stride % 4 == 0) {
- continue;
- }
-
- /* Workaround for translate: output floats instead of halfs. */
- switch (output_format) {
- case PIPE_FORMAT_R16_FLOAT:
- output_format = PIPE_FORMAT_R32_FLOAT;
- output_format_size = 4;
- break;
- case PIPE_FORMAT_R16G16_FLOAT:
- output_format = PIPE_FORMAT_R32G32_FLOAT;
- output_format_size = 8;
- break;
- case PIPE_FORMAT_R16G16B16_FLOAT:
- output_format = PIPE_FORMAT_R32G32B32_FLOAT;
- output_format_size = 12;
- break;
- case PIPE_FORMAT_R16G16B16A16_FLOAT:
- output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- output_format_size = 16;
- break;
- default:;
- }
-
- /* Add this vertex element. */
- te = &key.element[key.nr_elements];
- /*te->type;
- te->instance_divisor;*/
- te->input_buffer = ve->velem[i].vertex_buffer_index;
- te->input_format = ve->velem[i].src_format;
- te->input_offset = vb->buffer_offset + ve->velem[i].src_offset;
- te->output_format = output_format;
- te->output_offset = key.output_stride;
-
- key.output_stride += output_format_size;
- vb_translated[ve->velem[i].vertex_buffer_index] = TRUE;
- tr_elem_index[i] = key.nr_elements;
- key.nr_elements++;
- }
-
- /* Get a translate object. */
- tr = translate_cache_find(r300->tran.translate_cache, &key);
-
- /* Map buffers we want to translate. */
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- if (vb_translated[i]) {
- struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i];
-
- vb_map[i] = pipe_buffer_map(pipe, vb->buffer,
- PIPE_TRANSFER_READ, &vb_transfer[i]);
-
- tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index);
- }
- }
-
- /* Create and map the output buffer. */
- num_verts = r300->vertex_buffer_max_index + 1;
-
- out_buffer = pipe_buffer_create(&r300->screen->screen,
- PIPE_BIND_VERTEX_BUFFER,
- key.output_stride * num_verts);
-
- out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE,
- &out_transfer);
-
- /* Translate. */
- tr->run(tr, 0, num_verts, 0, out_map);
-
- /* Unmap all buffers. */
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- if (vb_translated[i]) {
- pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
- vb_transfer[i]);
- }
- }
-
- pipe_buffer_unmap(pipe, out_buffer, out_transfer);
-
- /* Setup the new vertex buffer in the first free slot. */
- for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
- struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i];
-
- if (!vb->buffer) {
- pipe_resource_reference(&vb->buffer, out_buffer);
- vb->buffer_offset = 0;
- vb->max_index = num_verts - 1;
- vb->stride = key.output_stride;
- r300->tran.vb_slot = i;
- r300->validate_buffers = TRUE;
- break;
- }
- }
-
- /* Save and replace vertex elements. */
- {
- struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
-
- r300->tran.saved_velems = r300->velems;
-
- for (i = 0; i < ve->count; i++) {
- if (vb_translated[ve->velem[i].vertex_buffer_index]) {
- te = &key.element[tr_elem_index[i]];
- new_velems[i].instance_divisor = ve->velem[i].instance_divisor;
- new_velems[i].src_format = te->output_format;
- new_velems[i].src_offset = te->output_offset;
- new_velems[i].vertex_buffer_index = r300->tran.vb_slot;
- } else {
- memcpy(&new_velems[i], &ve->velem[i],
- sizeof(struct pipe_vertex_element));
- }
- }
-
- r300->tran.new_velems =
- pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
- pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems);
- }
-
- pipe_resource_reference(&out_buffer, NULL);
-}
-
-void r300_end_vertex_translate(struct r300_context *r300)
-{
- struct pipe_context *pipe = &r300->context;
-
- /* Restore vertex elements. */
- pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems);
- pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems);
-
- /* Delete the now-unused VBO. */
- pipe_resource_reference(&r300->vertex_buffer[r300->tran.vb_slot].buffer,
- NULL);
-}
void r300_translate_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned *index_size, unsigned index_offset,
unsigned *start, unsigned count)
{
+ struct pipe_resource *out_buffer = NULL;
+ unsigned out_offset;
+ void *ptr;
+ boolean flushed;
+
switch (*index_size) {
- case 1:
- util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count);
- *index_size = 2;
- *start = 0;
- r300->validate_buffers = TRUE;
- break;
+ case 1:
+ u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2,
+ &out_offset, &out_buffer, &flushed, &ptr);
+
+ util_shorten_ubyte_elts_to_userptr(
+ &r300->context, *index_buffer, index_offset,
+ *start, count, ptr);
+
+ *index_buffer = NULL;
+ pipe_resource_reference(index_buffer, out_buffer);
+ *index_size = 2;
+ *start = out_offset / 2;
+ break;
+
+ case 2:
+ if (index_offset) {
+ u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2,
+ &out_offset, &out_buffer, &flushed, &ptr);
+
+ util_rebuild_ushort_elts_to_userptr(&r300->context, *index_buffer,
+ index_offset, *start,
+ count, ptr);
+
+ *index_buffer = NULL;
+ pipe_resource_reference(index_buffer, out_buffer);
+ *start = out_offset / 2;
+ }
+ break;
- case 2:
- if (*start % 2 != 0 || index_offset) {
- util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count);
- *start = 0;
- r300->validate_buffers = TRUE;
- }
- break;
+ case 4:
+ if (index_offset) {
+ u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 4,
+ &out_offset, &out_buffer, &flushed, &ptr);
- case 4:
- if (index_offset) {
- util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count);
- *start = 0;
- r300->validate_buffers = TRUE;
- }
- break;
+ util_rebuild_uint_elts_to_userptr(&r300->context, *index_buffer,
+ index_offset, *start,
+ count, ptr);
+
+ *index_buffer = NULL;
+ pipe_resource_reference(index_buffer, out_buffer);
+ *start = out_offset / 4;
+ }
+ break;
}
}
diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c
index dd1df97059..6593474b1f 100644
--- a/src/gallium/drivers/r300/r300_resource.c
+++ b/src/gallium/drivers/r300/r300_resource.c
@@ -38,26 +38,14 @@ r300_resource_create(struct pipe_screen *screen,
}
-static struct pipe_resource *
-r300_resource_from_handle(struct pipe_screen * screen,
- const struct pipe_resource *templ,
- struct winsys_handle *whandle)
-{
- if (templ->target == PIPE_BUFFER)
- return NULL;
- else
- return r300_texture_from_handle(screen, templ, whandle);
-}
-
void r300_init_resource_functions(struct r300_context *r300)
{
r300->context.get_transfer = u_get_transfer_vtbl;
r300->context.transfer_map = u_transfer_map_vtbl;
- r300->context.transfer_flush_region = u_transfer_flush_region_vtbl;
+ r300->context.transfer_flush_region = u_default_transfer_flush_region;
r300->context.transfer_unmap = u_transfer_unmap_vtbl;
r300->context.transfer_destroy = u_transfer_destroy_vtbl;
r300->context.transfer_inline_write = u_transfer_inline_write_vtbl;
- r300->context.is_resource_referenced = u_is_resource_referenced_vtbl;
r300->context.create_surface = r300_create_surface;
r300->context.surface_destroy = r300_surface_destroy;
}
@@ -65,8 +53,8 @@ void r300_init_resource_functions(struct r300_context *r300)
void r300_init_screen_resource_functions(struct r300_screen *r300screen)
{
r300screen->screen.resource_create = r300_resource_create;
- r300screen->screen.resource_from_handle = r300_resource_from_handle;
- r300screen->screen.resource_get_handle = u_resource_get_handle_vtbl;
+ r300screen->screen.resource_from_handle = r300_texture_from_handle;
+ r300screen->screen.resource_get_handle = r300_resource_get_handle;
r300screen->screen.resource_destroy = u_resource_destroy_vtbl;
r300screen->screen.user_buffer_create = r300_user_buffer_create;
}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 921d6f1e67..8a69628c53 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -24,6 +24,7 @@
#include "util/u_format.h"
#include "util/u_format_s3tc.h"
#include "util/u_memory.h"
+#include "os/os_time.h"
#include "r300_context.h"
#include "r300_texture.h"
@@ -83,12 +84,8 @@ static const char* r300_get_name(struct pipe_screen* pscreen)
static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
{
struct r300_screen* r300screen = r300_screen(pscreen);
- boolean is_r400 = r300screen->caps.is_r400;
boolean is_r500 = r300screen->caps.is_r500;
- /* XXX extended shader capabilities of r400 unimplemented */
- is_r400 = FALSE;
-
switch (param) {
/* Supported features (boolean caps). */
case PIPE_CAP_NPOT_TEXTURES:
@@ -116,6 +113,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
return 1;
case PIPE_CAP_TEXTURE_SWIZZLE:
return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1;
@@ -125,12 +123,17 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_DUAL_SOURCE_BLEND:
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
- case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */
+ case PIPE_CAP_DEPTH_CLAMP:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_ARRAY_TEXTURES:
+ return 0;
+
+ /* SWTCL-only features. */
case PIPE_CAP_STREAM_OUTPUT:
case PIPE_CAP_PRIMITIVE_RESTART:
- return 0;
+ case PIPE_CAP_TGSI_INSTANCEID:
+ return !r300screen->caps.has_tcl;
/* Texturing. */
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
@@ -171,9 +174,6 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
boolean is_r400 = r300screen->caps.is_r400;
boolean is_r500 = r300screen->caps.is_r500;
- /* XXX extended shader capabilities of r400 unimplemented */
- is_r400 = FALSE;
-
switch (shader)
{
case PIPE_SHADER_FRAGMENT:
@@ -306,10 +306,11 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned usage,
- unsigned geom_flags)
+ unsigned usage)
{
+ struct r300_winsys_screen *rws = r300_screen(screen)->rws;
uint32_t retval = 0;
+ boolean drm_2_8_0 = rws->get_value(rws, R300_VID_DRM_2_8_0);
boolean is_r500 = r300_screen(screen)->caps.is_r500;
boolean is_r400 = r300_screen(screen)->caps.is_r400;
boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM ||
@@ -317,9 +318,13 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
format == PIPE_FORMAT_B10G10R10A2_UNORM ||
format == PIPE_FORMAT_R10SG10SB10SA2U_NORM;
boolean is_ati1n = format == PIPE_FORMAT_RGTC1_UNORM ||
- format == PIPE_FORMAT_RGTC1_SNORM;
+ format == PIPE_FORMAT_RGTC1_SNORM ||
+ format == PIPE_FORMAT_LATC1_UNORM ||
+ format == PIPE_FORMAT_LATC1_SNORM;
boolean is_ati2n = format == PIPE_FORMAT_RGTC2_UNORM ||
- format == PIPE_FORMAT_RGTC2_SNORM;
+ format == PIPE_FORMAT_RGTC2_SNORM ||
+ format == PIPE_FORMAT_LATC2_UNORM ||
+ format == PIPE_FORMAT_LATC2_SNORM;
boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT ||
format == PIPE_FORMAT_R16G16_FLOAT ||
format == PIPE_FORMAT_R16G16B16_FLOAT ||
@@ -363,7 +368,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED)) &&
/* 2101010 cannot be rendered to on non-r5xx. */
- (is_r500 || !is_color2101010) &&
+ (!is_color2101010 || (is_r500 && drm_2_8_0)) &&
r300_is_colorbuffer_format_supported(format)) {
retval |= usage &
(PIPE_BIND_RENDER_TARGET |
@@ -401,6 +406,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen)
struct r300_winsys_screen *rws = r300_winsys_screen(pscreen);
util_slab_destroy(&r300screen->pool_buffers);
+ pipe_mutex_destroy(r300screen->num_contexts_mutex);
if (rws)
rws->destroy(rws);
@@ -412,33 +418,44 @@ static void r300_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
- struct r300_fence **oldf = (struct r300_fence**)ptr;
- struct r300_fence *newf = (struct r300_fence*)fence;
-
- if (pipe_reference(&(*oldf)->reference, &newf->reference))
- FREE(*oldf);
-
- *ptr = fence;
+ r300_winsys_bo_reference((struct r300_winsys_bo**)ptr,
+ (struct r300_winsys_bo*)fence);
}
-static int r300_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flags)
+static boolean r300_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence)
{
- struct r300_fence *rfence = (struct r300_fence*)fence;
+ struct r300_winsys_screen *rws = r300_screen(screen)->rws;
+ struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence;
- return rfence->signalled ? 0 : 1; /* 0 == success */
+ return !rws->buffer_is_busy(rfence);
}
-static int r300_fence_finish(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flags)
+static boolean r300_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout)
{
- struct r300_fence *rfence = (struct r300_fence*)fence;
+ struct r300_winsys_screen *rws = r300_screen(screen)->rws;
+ struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence;
+
+ if (timeout != PIPE_TIMEOUT_INFINITE) {
+ int64_t start_time = os_time_get();
- r300_finish(rfence->ctx);
- rfence->signalled = TRUE;
- return 0; /* 0 == success */
+ /* Convert to microseconds. */
+ timeout /= 1000;
+
+ /* Wait in a loop. */
+ while (rws->buffer_is_busy(rfence)) {
+ if (os_time_get() - start_time >= timeout) {
+ return FALSE;
+ }
+ os_time_sleep(10);
+ }
+ return TRUE;
+ }
+
+ rws->buffer_wait(rfence);
+ return TRUE;
}
struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws)
@@ -457,12 +474,15 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws)
r300_init_debug(r300screen);
r300_parse_chipset(&r300screen->caps);
- r300screen->caps.index_bias_supported =
- r300screen->caps.is_r500 &&
- rws->get_value(rws, R300_VID_DRM_2_3_0);
+ if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK))
+ r300screen->caps.zmask_ram = 0;
+ if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ))
+ r300screen->caps.hiz_ram = 0;
+
+ pipe_mutex_init(r300screen->num_contexts_mutex);
util_slab_create(&r300screen->pool_buffers,
- sizeof(struct r300_buffer), 64,
+ sizeof(struct r300_resource), 64,
UTIL_SLAB_SINGLETHREADED);
r300screen->rws = rws;
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 5847fe1ffc..576f9c1f4a 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -52,6 +52,7 @@ struct r300_screen {
/* The number of created contexts to know whether we have multiple
* contexts or not. */
int num_contexts;
+ pipe_mutex num_contexts_mutex;
};
@@ -93,6 +94,8 @@ r300_winsys_screen(struct pipe_screen *screen) {
#define DBG_CBZB (1 << 11)
#define DBG_HYPERZ (1 << 12)
#define DBG_SCISSOR (1 << 13)
+#define DBG_UPLOAD (1 << 14)
+#define DBG_INFO (1 << 15)
/* Features. */
#define DBG_ANISOHQ (1 << 16)
#define DBG_NO_TILING (1 << 17)
@@ -100,6 +103,8 @@ r300_winsys_screen(struct pipe_screen *screen) {
#define DBG_FAKE_OCC (1 << 19)
#define DBG_NO_OPT (1 << 20)
#define DBG_NO_CBZB (1 << 21)
+#define DBG_NO_ZMASK (1 << 22)
+#define DBG_NO_HIZ (1 << 23)
/* Statistics. */
#define DBG_P_STAT (1 << 25)
/*@}*/
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index 4436443522..986ae384fb 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -33,109 +33,36 @@
#include "r300_screen_buffer.h"
#include "r300_winsys.h"
-unsigned r300_buffer_is_referenced(struct pipe_context *context,
- struct pipe_resource *buf,
- enum r300_reference_domain domain)
+void r300_upload_index_buffer(struct r300_context *r300,
+ struct pipe_resource **index_buffer,
+ unsigned index_size, unsigned *start,
+ unsigned count, uint8_t *ptr)
{
- struct r300_context *r300 = r300_context(context);
- struct r300_buffer *rbuf = r300_buffer(buf);
+ unsigned index_offset;
+ boolean flushed;
- if (r300_buffer_is_user_buffer(buf))
- return PIPE_UNREFERENCED;
+ *index_buffer = NULL;
- if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, domain))
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ u_upload_data(r300->vbuf_mgr->uploader,
+ 0, count * index_size,
+ ptr + (*start * index_size),
+ &index_offset,
+ index_buffer, &flushed);
- return PIPE_UNREFERENCED;
-}
-
-static unsigned r300_buffer_is_referenced_by_cs(struct pipe_context *context,
- struct pipe_resource *buf,
- unsigned level, int layer)
-{
- return r300_buffer_is_referenced(context, buf, R300_REF_CS);
-}
-
-/* External helper, not required to implent u_resource_vtbl:
- */
-int r300_upload_index_buffer(struct r300_context *r300,
- struct pipe_resource **index_buffer,
- unsigned index_size,
- unsigned start,
- unsigned count,
- unsigned *out_offset)
-{
- struct pipe_resource *upload_buffer = NULL;
- unsigned index_offset = start * index_size;
- int ret = 0;
-
- if (r300_buffer_is_user_buffer(*index_buffer)) {
- ret = u_upload_buffer(r300->upload_ib,
- index_offset,
- count * index_size,
- *index_buffer,
- &index_offset,
- &upload_buffer);
- if (ret) {
- goto done;
- }
- *index_buffer = upload_buffer;
- *out_offset = index_offset / index_size;
- } else
- *out_offset = start;
-
- done:
- // if (upload_buffer)
- // pipe_resource_reference(&upload_buffer, NULL);
- return ret;
-}
-
-/* External helper, not required to implement u_resource_vtbl:
- */
-int r300_upload_user_buffers(struct r300_context *r300)
-{
- enum pipe_error ret = PIPE_OK;
- int i, nr;
-
- nr = r300->velems->count;
-
- for (i = 0; i < nr; i++) {
- struct pipe_vertex_buffer *vb =
- &r300->vertex_buffer[r300->velems->velem[i].vertex_buffer_index];
-
- if (r300_buffer_is_user_buffer(vb->buffer)) {
- struct pipe_resource *upload_buffer = NULL;
- unsigned offset = 0; /*vb->buffer_offset * 4;*/
- unsigned size = vb->buffer->width0;
- unsigned upload_offset;
- ret = u_upload_buffer(r300->upload_vb,
- offset, size,
- vb->buffer,
- &upload_offset, &upload_buffer);
- if (ret)
- return ret;
-
- pipe_resource_reference(&vb->buffer, NULL);
- vb->buffer = upload_buffer;
- vb->buffer_offset = upload_offset;
- r300->validate_buffers = TRUE;
- }
- }
- return ret;
+ *start = index_offset / index_size;
}
static void r300_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
struct r300_screen *r300screen = r300_screen(screen);
- struct r300_buffer *rbuf = r300_buffer(buf);
- struct r300_winsys_screen *rws = r300screen->rws;
+ struct r300_resource *rbuf = r300_resource(buf);
if (rbuf->constant_buffer)
FREE(rbuf->constant_buffer);
if (rbuf->buf)
- rws->buffer_reference(rws, &rbuf->buf, NULL);
+ r300_winsys_bo_reference(&rbuf->buf, NULL);
util_slab_free(&r300screen->pool_buffers, rbuf);
}
@@ -179,93 +106,31 @@ r300_buffer_transfer_map( struct pipe_context *pipe,
struct r300_context *r300 = r300_context(pipe);
struct r300_screen *r300screen = r300_screen(pipe->screen);
struct r300_winsys_screen *rws = r300screen->rws;
- struct r300_buffer *rbuf = r300_buffer(transfer->resource);
+ struct r300_resource *rbuf = r300_resource(transfer->resource);
uint8_t *map;
- boolean flush = FALSE;
- unsigned i;
- if (rbuf->user_buffer)
- return (uint8_t *) rbuf->user_buffer + transfer->box.x;
+ if (rbuf->b.user_ptr)
+ return (uint8_t *) rbuf->b.user_ptr + transfer->box.x;
if (rbuf->constant_buffer)
return (uint8_t *) rbuf->constant_buffer + transfer->box.x;
- /* check if the mapping is to a range we already flushed */
- if (transfer->usage & PIPE_TRANSFER_DISCARD) {
- for (i = 0; i < rbuf->num_ranges; i++) {
- if ((transfer->box.x >= rbuf->ranges[i].start) &&
- (transfer->box.x < rbuf->ranges[i].end))
- flush = TRUE;
-
- if (flush) {
- /* unreference this hw buffer and allocate a new one */
- rws->buffer_reference(rws, &rbuf->buf, NULL);
-
- rbuf->num_ranges = 0;
- rbuf->buf =
- r300screen->rws->buffer_create(r300screen->rws,
- rbuf->b.b.width0, 16,
- rbuf->b.b.bind,
- rbuf->b.b.usage,
- rbuf->domain);
- rbuf->cs_buf =
- r300screen->rws->buffer_get_cs_handle(r300screen->rws,
- rbuf->buf);
- break;
- }
- }
- }
-
- map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage);
+ map = rws->buffer_map(rbuf->buf, r300->cs, transfer->usage);
if (map == NULL)
return NULL;
- /* map_buffer() returned a pointer to the beginning of the buffer,
- * but transfers are expected to return a pointer to just the
- * region specified in the box.
- */
return map + transfer->box.x;
}
-static void r300_buffer_transfer_flush_region( struct pipe_context *pipe,
- struct pipe_transfer *transfer,
- const struct pipe_box *box)
-{
- struct r300_buffer *rbuf = r300_buffer(transfer->resource);
- unsigned i;
- unsigned offset = transfer->box.x + box->x;
- unsigned length = box->width;
-
- assert(box->x + box->width <= transfer->box.width);
-
- if (rbuf->user_buffer)
- return;
- if (rbuf->constant_buffer)
- return;
-
- /* mark the range as used */
- for(i = 0; i < rbuf->num_ranges; ++i) {
- if(offset <= rbuf->ranges[i].end && rbuf->ranges[i].start <= (offset+box->width)) {
- rbuf->ranges[i].start = MIN2(rbuf->ranges[i].start, offset);
- rbuf->ranges[i].end = MAX2(rbuf->ranges[i].end, (offset+length));
- return;
- }
- }
-
- rbuf->ranges[rbuf->num_ranges].start = offset;
- rbuf->ranges[rbuf->num_ranges].end = offset+length;
- rbuf->num_ranges++;
-}
-
static void r300_buffer_transfer_unmap( struct pipe_context *pipe,
struct pipe_transfer *transfer )
{
struct r300_screen *r300screen = r300_screen(pipe->screen);
struct r300_winsys_screen *rws = r300screen->rws;
- struct r300_buffer *rbuf = r300_buffer(transfer->resource);
+ struct r300_resource *rbuf = r300_resource(transfer->resource);
if (rbuf->buf) {
- rws->buffer_unmap(rws, rbuf->buf);
+ rws->buffer_unmap(rbuf->buf);
}
}
@@ -278,34 +143,33 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe,
unsigned stride,
unsigned layer_stride)
{
- struct r300_buffer *rbuf = r300_buffer(resource);
- struct pipe_transfer *transfer = NULL;
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_winsys_screen *rws = r300->screen->rws;
+ struct r300_resource *rbuf = r300_resource(resource);
uint8_t *map = NULL;
if (rbuf->constant_buffer) {
memcpy(rbuf->constant_buffer + box->x, data, box->width);
return;
}
+ assert(rbuf->b.user_ptr == NULL);
- transfer = r300_buffer_get_transfer(pipe, resource, 0,
- PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, box);
- map = r300_buffer_transfer_map(pipe, transfer);
+ map = rws->buffer_map(rbuf->buf, r300->cs,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage);
- memcpy(map, data, box->width);
+ memcpy(map + box->x, data, box->width);
- r300_buffer_transfer_unmap(pipe, transfer);
- r300_buffer_transfer_destroy(pipe, transfer);
+ rws->buffer_unmap(rbuf->buf);
}
-struct u_resource_vtbl r300_buffer_vtbl =
+static const struct u_resource_vtbl r300_buffer_vtbl =
{
- u_default_resource_get_handle, /* get_handle */
+ NULL, /* get_handle */
r300_buffer_destroy, /* resource_destroy */
- r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */
r300_buffer_get_transfer, /* get_transfer */
r300_buffer_transfer_destroy, /* transfer_destroy */
r300_buffer_transfer_map, /* transfer_map */
- r300_buffer_transfer_flush_region, /* transfer_flush_region */
+ NULL, /* transfer_flush_region */
r300_buffer_transfer_unmap, /* transfer_unmap */
r300_buffer_transfer_inline_write /* transfer_inline_write */
};
@@ -314,73 +178,68 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
struct r300_screen *r300screen = r300_screen(screen);
- struct r300_buffer *rbuf;
+ struct r300_resource *rbuf;
unsigned alignment = 16;
rbuf = util_slab_alloc(&r300screen->pool_buffers);
- rbuf->magic = R300_BUFFER_MAGIC;
-
- rbuf->b.b = *templ;
- rbuf->b.vtbl = &r300_buffer_vtbl;
- pipe_reference_init(&rbuf->b.b.reference, 1);
- rbuf->b.b.screen = screen;
+ rbuf->b.b.b = *templ;
+ rbuf->b.b.vtbl = &r300_buffer_vtbl;
+ pipe_reference_init(&rbuf->b.b.b.reference, 1);
+ rbuf->b.b.b.screen = screen;
+ rbuf->b.user_ptr = NULL;
rbuf->domain = R300_DOMAIN_GTT;
- rbuf->num_ranges = 0;
rbuf->buf = NULL;
+ rbuf->buf_size = templ->width0;
rbuf->constant_buffer = NULL;
- rbuf->user_buffer = NULL;
/* Alloc constant buffers in RAM. */
if (templ->bind & PIPE_BIND_CONSTANT_BUFFER) {
rbuf->constant_buffer = MALLOC(templ->width0);
- return &rbuf->b.b;
+ return &rbuf->b.b.b;
}
rbuf->buf =
r300screen->rws->buffer_create(r300screen->rws,
- rbuf->b.b.width0, alignment,
- rbuf->b.b.bind, rbuf->b.b.usage,
+ rbuf->b.b.b.width0, alignment,
+ rbuf->b.b.b.bind, rbuf->b.b.b.usage,
rbuf->domain);
- rbuf->cs_buf =
- r300screen->rws->buffer_get_cs_handle(r300screen->rws, rbuf->buf);
-
if (!rbuf->buf) {
util_slab_free(&r300screen->pool_buffers, rbuf);
return NULL;
}
- return &rbuf->b.b;
+ rbuf->cs_buf =
+ r300screen->rws->buffer_get_cs_handle(rbuf->buf);
+
+ return &rbuf->b.b.b;
}
struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
- void *ptr,
- unsigned bytes,
+ void *ptr, unsigned size,
unsigned bind)
{
struct r300_screen *r300screen = r300_screen(screen);
- struct r300_buffer *rbuf;
+ struct r300_resource *rbuf;
rbuf = util_slab_alloc(&r300screen->pool_buffers);
- rbuf->magic = R300_BUFFER_MAGIC;
-
- pipe_reference_init(&rbuf->b.b.reference, 1);
- rbuf->b.vtbl = &r300_buffer_vtbl;
- rbuf->b.b.screen = screen;
- rbuf->b.b.target = PIPE_BUFFER;
- rbuf->b.b.format = PIPE_FORMAT_R8_UNORM;
- rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE;
- rbuf->b.b.bind = bind;
- rbuf->b.b.width0 = bytes;
- rbuf->b.b.height0 = 1;
- rbuf->b.b.depth0 = 1;
- rbuf->b.b.array_size = 1;
- rbuf->b.b.flags = 0;
+ pipe_reference_init(&rbuf->b.b.b.reference, 1);
+ rbuf->b.b.b.screen = screen;
+ rbuf->b.b.b.target = PIPE_BUFFER;
+ rbuf->b.b.b.format = PIPE_FORMAT_R8_UNORM;
+ rbuf->b.b.b.usage = PIPE_USAGE_IMMUTABLE;
+ rbuf->b.b.b.bind = bind;
+ rbuf->b.b.b.width0 = ~0;
+ rbuf->b.b.b.height0 = 1;
+ rbuf->b.b.b.depth0 = 1;
+ rbuf->b.b.b.array_size = 1;
+ rbuf->b.b.b.flags = 0;
+ rbuf->b.b.vtbl = &r300_buffer_vtbl;
+ rbuf->b.user_ptr = ptr;
rbuf->domain = R300_DOMAIN_GTT;
- rbuf->num_ranges = 0;
rbuf->buf = NULL;
+ rbuf->buf_size = size;
rbuf->constant_buffer = NULL;
- rbuf->user_buffer = ptr;
- return &rbuf->b.b;
+ return &rbuf->b.b.b;
}
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h
index 0b3555dd81..cdbc4425fc 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.h
+++ b/src/gallium/drivers/r300/r300_screen_buffer.h
@@ -35,53 +35,19 @@
#include "r300_winsys.h"
#include "r300_context.h"
-#define R300_BUFFER_MAGIC 0xabcd1234
-#define R300_BUFFER_MAX_RANGES 32
-
-struct r300_buffer_range {
- uint32_t start;
- uint32_t end;
-};
-
-/* Vertex buffer. */
-struct r300_buffer
-{
- struct u_resource b;
-
- uint32_t magic;
-
- struct r300_winsys_buffer *buf;
- struct r300_winsys_cs_buffer *cs_buf;
-
- enum r300_buffer_domain domain;
-
- uint8_t *user_buffer;
- uint8_t *constant_buffer;
- struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES];
- unsigned num_ranges;
-};
-
/* Functions. */
-int r300_upload_user_buffers(struct r300_context *r300);
-
-int r300_upload_index_buffer(struct r300_context *r300,
- struct pipe_resource **index_buffer,
- unsigned index_size,
- unsigned start,
- unsigned count, unsigned *out_offset);
+void r300_upload_index_buffer(struct r300_context *r300,
+ struct pipe_resource **index_buffer,
+ unsigned index_size, unsigned *start,
+ unsigned count, uint8_t *ptr);
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
- void *ptr,
- unsigned bytes,
- unsigned usage);
-
-unsigned r300_buffer_is_referenced(struct pipe_context *context,
- struct pipe_resource *buf,
- enum r300_reference_domain domain);
+ void *ptr, unsigned size,
+ unsigned bind);
/* Inline functions. */
@@ -90,9 +56,4 @@ static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
return (struct r300_buffer *)buffer;
}
-static INLINE boolean r300_buffer_is_user_buffer(struct pipe_resource *buffer)
-{
- return r300_buffer(buffer)->user_buffer ? true : false;
-}
-
#endif
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 7529253240..ecb4fc691c 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -24,10 +24,12 @@
#include "draw/draw_context.h"
#include "util/u_framebuffer.h"
+#include "util/u_half.h"
#include "util/u_math.h"
#include "util/u_mm.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
+#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
@@ -44,7 +46,6 @@
#include "r300_texture.h"
#include "r300_vs.h"
#include "r300_winsys.h"
-#include "r300_hyperz.h"
/* r300_state: Functions used to intialize state context by translating
* Gallium state objects into semi-native r300 state objects. */
@@ -187,12 +188,16 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
struct r300_screen* r300screen = r300_screen(pipe->screen);
struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
uint32_t blend_control = 0; /* R300_RB3D_CBLEND: 0x4e04 */
+ uint32_t blend_control_noclamp = 0; /* R300_RB3D_CBLEND: 0x4e04 */
uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */
+ uint32_t alpha_blend_control_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */
uint32_t color_channel_mask = 0; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */
uint32_t rop = 0; /* R300_RB3D_ROPCNTL: 0x4e18 */
uint32_t dither = 0; /* R300_RB3D_DITHER_CTL: 0x4e50 */
CB_LOCALS;
+ blend->state = *state;
+
if (state->rt[0].blend_enable)
{
unsigned eqRGB = state->rt[0].rgb_func;
@@ -205,10 +210,14 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
/* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha,
* this is just the crappy D3D naming */
- blend_control = R300_ALPHA_BLEND_ENABLE |
- r300_translate_blend_function(eqRGB) |
+ blend_control = blend_control_noclamp =
+ R300_ALPHA_BLEND_ENABLE |
( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
+ blend_control |=
+ r300_translate_blend_function(eqRGB, TRUE);
+ blend_control_noclamp |=
+ r300_translate_blend_function(eqRGB, FALSE);
/* Optimization: some operations do not require the destination color.
*
@@ -230,6 +239,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) {
/* Enable reading from the colorbuffer. */
blend_control |= R300_READ_ENABLE;
+ blend_control_noclamp |= R300_READ_ENABLE;
if (r300screen->caps.is_r500) {
/* Optimization: Depending on incoming pixels, we can
@@ -305,10 +315,14 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
/* separate alpha */
if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
blend_control |= R300_SEPARATE_ALPHA_ENABLE;
- alpha_blend_control =
- r300_translate_blend_function(eqA) |
+ blend_control_noclamp |= R300_SEPARATE_ALPHA_ENABLE;
+ alpha_blend_control = alpha_blend_control_noclamp =
(r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
(r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
+ alpha_blend_control |=
+ r300_translate_blend_function(eqA, TRUE);
+ alpha_blend_control_noclamp |=
+ r300_translate_blend_function(eqA, FALSE);
}
}
@@ -345,7 +359,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
*/
/* Build a command buffer. */
- BEGIN_CB(blend->cb, 8);
+ BEGIN_CB(blend->cb_clamp, 8);
OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
OUT_CB(blend_control);
@@ -354,6 +368,16 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
END_CB;
+ /* Build a command buffer. */
+ BEGIN_CB(blend->cb_noclamp, 8);
+ OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
+ OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
+ OUT_CB(blend_control_noclamp);
+ OUT_CB(alpha_blend_control_noclamp);
+ OUT_CB(color_channel_mask);
+ OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
+ END_CB;
+
/* The same as above, but with no colorbuffer reads and writes. */
BEGIN_CB(blend->cb_no_readwrite, 8);
OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
@@ -374,6 +398,10 @@ static void r300_bind_blend_state(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
UPDATE_STATE(state, r300->blend_state);
+
+ if (r300->fs.state && r300_pick_fragment_shader(r300)) {
+ r300_mark_fs_code_dirty(r300);
+ }
}
/* Free blend state. */
@@ -395,22 +423,64 @@ static void r300_set_blend_color(struct pipe_context* pipe,
const struct pipe_blend_color* color)
{
struct r300_context* r300 = r300_context(pipe);
- struct r300_blend_color_state* state =
+ struct pipe_framebuffer_state *fb = r300->fb_state.state;
+ struct r300_blend_color_state *state =
(struct r300_blend_color_state*)r300->blend_color_state.state;
+ struct pipe_blend_color c;
+ enum pipe_format format = fb->nr_cbufs ? fb->cbufs[0]->format : 0;
CB_LOCALS;
+ state->state = *color; /* Save it, so that we can reuse it in set_fb_state */
+ c = *color;
+
+ /* The blend color is dependent on the colorbuffer format. */
+ if (fb->nr_cbufs) {
+ switch (format) {
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ c.color[1] = c.color[0];
+ break;
+
+ case PIPE_FORMAT_A8_UNORM:
+ c.color[1] = c.color[3];
+ break;
+
+ case PIPE_FORMAT_R8G8_UNORM:
+ c.color[2] = c.color[1];
+ break;
+
+ case PIPE_FORMAT_L8A8_UNORM:
+ c.color[2] = c.color[3];
+ break;
+
+ default:;
+ }
+ }
+
if (r300->screen->caps.is_r500) {
- /* XXX if FP16 blending is enabled, we should use the FP16 format */
BEGIN_CB(state->cb, 3);
OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
- OUT_CB(float_to_fixed10(color->color[0]) |
- (float_to_fixed10(color->color[3]) << 16));
- OUT_CB(float_to_fixed10(color->color[2]) |
- (float_to_fixed10(color->color[1]) << 16));
+
+ switch (format) {
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ OUT_CB(util_float_to_half(c.color[2]) |
+ (util_float_to_half(c.color[3]) << 16));
+ OUT_CB(util_float_to_half(c.color[0]) |
+ (util_float_to_half(c.color[1]) << 16));
+ break;
+
+ default:
+ OUT_CB(float_to_fixed10(c.color[0]) |
+ (float_to_fixed10(c.color[3]) << 16));
+ OUT_CB(float_to_fixed10(c.color[2]) |
+ (float_to_fixed10(c.color[1]) << 16));
+ }
+
END_CB;
} else {
union util_color uc;
- util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ util_pack_color(c.color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
BEGIN_CB(state->cb, 2);
OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui);
@@ -442,8 +512,7 @@ static void r300_set_clip_state(struct pipe_context* pipe,
OUT_CB_TABLE(state->ucp, state->nr * 4);
}
OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) |
- R300_PS_UCP_MODE_CLIP_AS_TRIFAN |
- (state->depth_clamp ? R300_CLIP_DISABLE : 0));
+ R300_PS_UCP_MODE_CLIP_AS_TRIFAN);
END_CB;
r300_mark_atom_dirty(r300, &r300->clip_state);
@@ -538,29 +607,54 @@ static void*
r300_translate_alpha_function(state->alpha.func) |
R300_FG_ALPHA_FUNC_ENABLE;
- /* We could use 10bit alpha ref but who needs that? */
dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value);
+ dsa->alpha_value = util_float_to_half(state->alpha.ref_value);
- if (caps->is_r500)
+ if (caps->is_r500) {
+ dsa->alpha_function_fp16 = dsa->alpha_function |
+ R500_FG_ALPHA_FUNC_FP16_ENABLE;
dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT;
+ }
}
- BEGIN_CB(&dsa->cb_begin, 8);
+ BEGIN_CB(&dsa->cb_begin, 10);
OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
OUT_CB(dsa->z_buffer_control);
OUT_CB(dsa->z_stencil_control);
OUT_CB(dsa->stencil_ref_mask);
OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf);
+ OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
END_CB;
- BEGIN_CB(dsa->cb_no_readwrite, 8);
+ BEGIN_CB(&dsa->cb_begin_fp16, 10);
+ OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16);
+ OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
+ OUT_CB(dsa->z_buffer_control);
+ OUT_CB(dsa->z_stencil_control);
+ OUT_CB(dsa->stencil_ref_mask);
+ OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf);
+ OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
+ END_CB;
+
+ BEGIN_CB(dsa->cb_zb_no_readwrite, 10);
OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
OUT_CB(0);
OUT_CB(0);
OUT_CB(0);
OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0);
+ OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
+ END_CB;
+
+ BEGIN_CB(dsa->cb_fp16_zb_no_readwrite, 10);
+ OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16);
+ OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
+ OUT_CB(0);
+ OUT_CB(0);
+ OUT_CB(0);
+ OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0);
+ OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
END_CB;
return (void*)dsa;
@@ -617,21 +711,16 @@ static void r300_set_stencil_ref(struct pipe_context* pipe,
}
static void r300_tex_set_tiling_flags(struct r300_context *r300,
- struct r300_texture *tex, unsigned level)
+ struct r300_resource *tex,
+ unsigned level)
{
/* Check if the macrotile flag needs to be changed.
* Skip changing the flags otherwise. */
- if (tex->desc.macrotile[tex->surface_level] !=
- tex->desc.macrotile[level]) {
- /* Tiling determines how DRM treats the buffer data.
- * We must flush CS when changing it if the buffer is referenced. */
- if (r300->rws->cs_is_buffer_referenced(r300->cs,
- tex->cs_buffer, R300_REF_CS))
- r300->context.flush(&r300->context, 0, NULL);
-
- r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->desc.microtile, tex->desc.macrotile[level],
- tex->desc.stride_in_bytes[0]);
+ if (tex->tex.macrotile[tex->surface_level] !=
+ tex->tex.macrotile[level]) {
+ r300->rws->buffer_set_tiling(tex->buf, r300->cs,
+ tex->tex.microtile, tex->tex.macrotile[level],
+ tex->tex.stride_in_bytes[0]);
tex->surface_level = level;
}
@@ -646,12 +735,12 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300,
/* Set tiling flags for new surfaces. */
for (i = 0; i < state->nr_cbufs; i++) {
r300_tex_set_tiling_flags(r300,
- r300_texture(state->cbufs[i]->texture),
+ r300_resource(state->cbufs[i]->texture),
state->cbufs[i]->u.tex.level);
}
if (state->zsbuf) {
r300_tex_set_tiling_flags(r300,
- r300_texture(state->zsbuf->texture),
+ r300_resource(state->zsbuf->texture),
state->zsbuf->u.tex.level);
}
}
@@ -660,7 +749,7 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
const char *binding)
{
struct pipe_resource *tex = surf->texture;
- struct r300_texture *rtex = r300_texture(tex);
+ struct r300_resource *rtex = r300_resource(tex);
fprintf(stderr,
"r300: %s[%i] Dim: %ix%i, Firstlayer: %i, "
@@ -673,9 +762,9 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level,
util_format_short_name(surf->format),
- rtex->desc.macrotile[0] ? "YES" : " NO",
- rtex->desc.microtile ? "YES" : " NO",
- rtex->desc.stride_in_pixels[0],
+ rtex->tex.macrotile[0] ? "YES" : " NO",
+ rtex->tex.microtile ? "YES" : " NO",
+ rtex->tex.stride_in_pixels[0],
tex->width0, tex->height0, tex->depth0,
tex->last_level, util_format_short_name(tex->format));
}
@@ -686,13 +775,23 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
struct pipe_framebuffer_state *state = r300->fb_state.state;
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
- /* What is marked as dirty depends on the enum r300_fb_state_change. */
r300_mark_atom_dirty(r300, &r300->gpu_flush);
r300_mark_atom_dirty(r300, &r300->fb_state);
- r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ /* What is marked as dirty depends on the enum r300_fb_state_change. */
if (change == R300_CHANGED_FB_STATE) {
r300_mark_atom_dirty(r300, &r300->aa_state);
+ r300_mark_atom_dirty(r300, &r300->dsa_state); /* for AlphaRef */
+ r300_set_blend_color(&r300->context, r300->blend_color_state.state);
+ }
+
+ if (change == R300_CHANGED_FB_STATE ||
+ change == R300_CHANGED_HYPERZ_FLAG) {
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
+
+ if (change == R300_CHANGED_FB_STATE ||
+ change == R300_CHANGED_MULTIWRITE) {
r300_mark_atom_dirty(r300, &r300->fb_state_pipelined);
}
@@ -704,23 +803,21 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
else if (state->zsbuf) {
r300->fb_state.size += 10;
if (can_hyperz)
- r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4;
+ r300->fb_state.size += 8;
}
/* The size of the rest of atoms stays the same. */
}
static void
- r300_set_framebuffer_state(struct pipe_context* pipe,
- const struct pipe_framebuffer_state* state)
+r300_set_framebuffer_state(struct pipe_context* pipe,
+ const struct pipe_framebuffer_state* state)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
struct pipe_framebuffer_state *old_state = r300->fb_state.state;
- boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
unsigned max_width, max_height, i;
uint32_t zbuffer_bpp = 0;
- int blocksize;
if (r300->screen->caps.is_r500) {
max_width = max_height = 4096;
@@ -736,10 +833,37 @@ static void
return;
}
- /* If nr_cbufs is changed from zero to non-zero or vice versa... */
- if (!!old_state->nr_cbufs != !!state->nr_cbufs) {
- r300_mark_atom_dirty(r300, &r300->blend_state);
+ if (old_state->zsbuf && r300->zmask_in_use && !r300->hyperz_locked) {
+ /* There is a zmask in use, what are we gonna do? */
+ if (state->zsbuf) {
+ if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
+ /* Decompress the currently bound zbuffer before we bind another one. */
+ r300_decompress_zmask(r300);
+ r300->hiz_in_use = FALSE;
+ }
+ } else {
+ /* We don't bind another zbuffer, so lock the current one. */
+ r300->hyperz_locked = TRUE;
+ pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
+ }
+ } else if (r300->hyperz_locked && r300->locked_zbuffer) {
+ /* We have a locked zbuffer now, what are we gonna do? */
+ if (state->zsbuf) {
+ if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) {
+ /* We are binding some other zbuffer, so decompress the locked one,
+ * it gets unlocked automatically. */
+ r300_decompress_zmask_locked_unsafe(r300);
+ r300->hiz_in_use = FALSE;
+ } else {
+ /* We are binding the locked zbuffer again, so unlock it. */
+ r300->hyperz_locked = FALSE;
+ }
+ }
}
+
+ /* Need to reset clamping or colormask. */
+ r300_mark_atom_dirty(r300, &r300->blend_state);
+
/* If zsbuf is set from NULL to non-NULL or vice versa.. */
if (!!old_state->zsbuf != !!state->zsbuf) {
r300_mark_atom_dirty(r300, &r300->dsa_state);
@@ -750,14 +874,14 @@ static void
util_copy_framebuffer_state(r300->fb_state.state, state);
+ if (!r300->hyperz_locked) {
+ pipe_surface_reference(&r300->locked_zbuffer, NULL);
+ }
+
r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
- r300->validate_buffers = TRUE;
- r300->z_compression = false;
-
if (state->zsbuf) {
- blocksize = util_format_get_blocksize(state->zsbuf->texture->format);
- switch (blocksize) {
+ switch (util_format_get_blocksize(state->zsbuf->texture->format)) {
case 2:
zbuffer_bpp = 16;
break;
@@ -765,31 +889,6 @@ static void
zbuffer_bpp = 24;
break;
}
- if (can_hyperz) {
- struct r300_surface *zs_surf = r300_surface(state->zsbuf);
- struct r300_texture *tex;
- int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44;
- int level = zs_surf->base.u.tex.level;
-
- tex = r300_texture(zs_surf->base.texture);
-
- /* work out whether we can support hiz on this buffer */
- r300_hiz_alloc_block(r300, zs_surf);
-
- /* work out whether we can support zmask features on this buffer */
- r300_zmask_alloc_block(r300, zs_surf, compress);
-
- if (tex->zmask_mem[level]) {
- /* compression causes hangs on 16-bit */
- if (zbuffer_bpp == 24)
- r300->z_compression = compress;
- }
- DBG(r300, DBG_HYPERZ,
- "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
- tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef,
- r300->z_compression, tex->zmask_mem[level] ? 1 : 0,
- tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef);
- }
/* Polygon offset depends on the zbuffer bit depth. */
if (r300->zbuffer_bpp != zbuffer_bpp) {
@@ -801,27 +900,25 @@ static void
}
/* Set up AA config. */
- if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
- if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) {
- aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
-
- switch (state->cbufs[0]->texture->nr_samples) {
- case 2:
- aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
- break;
- case 3:
- aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
- break;
- case 4:
- aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
- break;
- case 6:
- aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
- break;
- }
- } else {
- aa->aa_config = 0;
+ if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) {
+ aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
+
+ switch (state->cbufs[0]->texture->nr_samples) {
+ case 2:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
+ break;
+ case 3:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
+ break;
+ case 4:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
+ break;
+ case 6:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
+ break;
}
+ } else {
+ aa->aa_config = 0;
}
if (DBG_ON(r300, DBG_FB)) {
@@ -876,16 +973,25 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
+ struct pipe_framebuffer_state *fb = r300->fb_state.state;
+ boolean last_multi_write;
if (fs == NULL) {
r300->fs.state = NULL;
return;
}
+ last_multi_write = r300_fragment_shader_writes_all(r300_fs(r300));
+
r300->fs.state = fs;
r300_pick_fragment_shader(r300);
r300_mark_fs_code_dirty(r300);
+ if (fb->nr_cbufs > 1 &&
+ last_multi_write != r300_fragment_shader_writes_all(fs)) {
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_MULTIWRITE);
+ }
+
r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
}
@@ -934,12 +1040,14 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */
uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */
+ uint32_t round_mode; /* R300_GA_ROUND_MODE: 0x428c */
/* Point sprites texture coordinates, 0: lower left, 1: upper right */
float point_texcoord_left = 0; /* R300_GA_POINT_S0: 0x4200 */
float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */
float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */
float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */
+ boolean vclamp = TRUE;
CB_LOCALS;
/* Copy rasterizer state. */
@@ -1062,6 +1170,12 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
}
}
+ /* Vertex color clamping. FP20 means no clamping. */
+ round_mode =
+ R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST |
+ (!vclamp ? (R300_GA_ROUND_MODE_RGB_CLAMP_FP20 |
+ R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20) : 0);
+
/* Build the main command buffer. */
BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE);
OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status);
@@ -1076,6 +1190,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config);
OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value);
OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode);
+ OUT_CB_REG(R300_GA_ROUND_MODE, round_mode);
OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule);
OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4);
OUT_CB_32F(point_texcoord_left);
@@ -1282,7 +1397,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
struct r300_textures_state* state =
(struct r300_textures_state*)r300->textures_state.state;
- struct r300_texture *texture;
+ struct r300_resource *texture;
unsigned i, real_num_views = 0, view_index = 0;
unsigned tex_units = r300->screen->caps.num_tex_units;
boolean dirty_tex = FALSE;
@@ -1298,29 +1413,27 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
}
for (i = 0; i < count; i++) {
- if (&state->sampler_views[i]->base != views[i]) {
- pipe_sampler_view_reference(
- (struct pipe_sampler_view**)&state->sampler_views[i],
- views[i]);
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view**)&state->sampler_views[i],
+ views[i]);
- if (!views[i]) {
- continue;
- }
+ if (!views[i]) {
+ continue;
+ }
- /* A new sampler view (= texture)... */
- dirty_tex = TRUE;
+ /* A new sampler view (= texture)... */
+ dirty_tex = TRUE;
- /* Set the texrect factor in the fragment shader.
+ /* Set the texrect factor in the fragment shader.
* Needed for RECT and NPOT fallback. */
- texture = r300_texture(views[i]->texture);
- if (texture->desc.is_npot) {
- r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
- }
+ texture = r300_resource(views[i]->texture);
+ if (texture->tex.is_npot) {
+ r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
+ }
- state->sampler_views[i]->texcache_region =
+ state->sampler_views[i]->texcache_region =
r300_assign_texture_cache_region(view_index, real_num_views);
- view_index++;
- }
+ view_index++;
}
for (i = count; i < tex_units; i++) {
@@ -1334,7 +1447,6 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
state->sampler_view_count = count;
r300_mark_atom_dirty(r300, &r300->textures_state);
- r300->validate_buffers = TRUE;
if (dirty_tex) {
r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
@@ -1347,7 +1459,7 @@ r300_create_sampler_view(struct pipe_context *pipe,
const struct pipe_sampler_view *templ)
{
struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
- struct r300_texture *tex = r300_texture(texture);
+ struct r300_resource *tex = r300_resource(texture);
boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle;
@@ -1449,88 +1561,30 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
const struct pipe_vertex_buffer* buffers)
{
struct r300_context* r300 = r300_context(pipe);
- struct pipe_vertex_buffer *vbo;
- unsigned i, max_index = (1 << 24) - 1;
- boolean any_user_buffer = FALSE;
+ unsigned i;
struct pipe_vertex_buffer dummy_vb = {0};
/* There must be at least one vertex buffer set, otherwise it locks up. */
if (!count) {
dummy_vb.buffer = r300->dummy_vb;
- dummy_vb.max_index = r300->dummy_vb->width0 / 4;
buffers = &dummy_vb;
count = 1;
}
- if (count == r300->vertex_buffer_count &&
- memcmp(r300->vertex_buffer, buffers,
- sizeof(struct pipe_vertex_buffer) * count) == 0) {
- return;
- }
+ u_vbuf_mgr_set_vertex_buffers(r300->vbuf_mgr, count, buffers);
if (r300->screen->caps.has_tcl) {
/* HW TCL. */
- r300->incompatible_vb_layout = FALSE;
-
- /* Check if the strides and offsets are aligned to the size of DWORD. */
for (i = 0; i < count; i++) {
- if (buffers[i].buffer) {
- if (buffers[i].stride % 4 != 0 ||
- buffers[i].buffer_offset % 4 != 0) {
- r300->incompatible_vb_layout = TRUE;
- break;
- }
+ if (buffers[i].buffer &&
+ !r300_resource(buffers[i].buffer)->b.user_ptr) {
}
}
-
- for (i = 0; i < count; i++) {
- /* Why, yes, I AM casting away constness. How did you know? */
- vbo = (struct pipe_vertex_buffer*)&buffers[i];
-
- /* Skip NULL buffers */
- if (!buffers[i].buffer) {
- continue;
- }
-
- if (r300_buffer_is_user_buffer(vbo->buffer)) {
- any_user_buffer = TRUE;
- }
-
- if (vbo->max_index == ~0) {
- /* if no VBO stride then only one vertex value so max index is 1 */
- /* should think about converting to VS constants like svga does */
- if (!vbo->stride)
- vbo->max_index = 1;
- else
- vbo->max_index =
- (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
- }
-
- max_index = MIN2(vbo->max_index, max_index);
- }
-
- r300->any_user_vbs = any_user_buffer;
- r300->vertex_buffer_max_index = max_index;
- r300->aos_dirty = TRUE;
- r300->validate_buffers = TRUE;
+ r300->vertex_arrays_dirty = TRUE;
} else {
/* SW TCL. */
draw_set_vertex_buffers(r300->draw, count, buffers);
}
-
- /* Common code. */
- for (i = 0; i < count; i++) {
- /* Reference our buffer. */
- pipe_resource_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer);
- }
- for (; i < r300->vertex_buffer_count; i++) {
- /* Dereference any old buffers. */
- pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
- }
-
- memcpy(r300->vertex_buffer, buffers,
- sizeof(struct pipe_vertex_buffer) * count);
- r300->vertex_buffer_count = count;
}
static void r300_set_index_buffer(struct pipe_context* pipe,
@@ -1538,19 +1592,19 @@ static void r300_set_index_buffer(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
- if (ib) {
+ if (ib && ib->buffer) {
+ assert(ib->offset % ib->index_size == 0);
+
pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer);
memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer));
+ r300->index_buffer.offset /= r300->index_buffer.index_size;
}
else {
pipe_resource_reference(&r300->index_buffer.buffer, NULL);
memset(&r300->index_buffer, 0, sizeof(r300->index_buffer));
}
- if (r300->screen->caps.has_tcl) {
- r300->validate_buffers = TRUE;
- }
- else {
+ if (!r300->screen->caps.has_tcl) {
draw_set_index_buffer(r300->draw, ib);
}
}
@@ -1563,17 +1617,11 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems)
enum pipe_format format;
unsigned i;
- if (velems->count > 16) {
- fprintf(stderr, "r300: More than 16 vertex elements are not supported,"
- " requested %i, using 16.\n", velems->count);
- velems->count = 16;
- }
-
/* Vertex shaders have no semantics on their inputs,
* so PSC should just route stuff based on the vertex elements,
* and not on attrib information. */
for (i = 0; i < velems->count; i++) {
- format = velems->hw_format[i];
+ format = velems->velem[i].src_format;
type = r300_translate_vertex_data_type(format);
if (type == R300_INVALID_FORMAT) {
@@ -1605,16 +1653,13 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems)
vstream->count = (i >> 1) + 1;
}
-#define FORMAT_REPLACE(what, withwhat) \
- case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break
-
static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
unsigned count,
const struct pipe_vertex_element* attribs)
{
+ struct r300_context *r300 = r300_context(pipe);
struct r300_vertex_element_state *velems;
unsigned i;
- enum pipe_format *format;
struct pipe_vertex_element dummy_attrib = {0};
/* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */
@@ -1622,81 +1667,33 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
dummy_attrib.src_format = PIPE_FORMAT_R8G8B8A8_UNORM;
attribs = &dummy_attrib;
count = 1;
+ } else if (count > 16) {
+ fprintf(stderr, "r300: More than 16 vertex elements are not supported,"
+ " requested %i, using 16.\n", count);
+ count = 16;
}
- assert(count <= PIPE_MAX_ATTRIBS);
velems = CALLOC_STRUCT(r300_vertex_element_state);
- if (velems != NULL) {
- velems->count = count;
- memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
-
- if (r300_screen(pipe->screen)->caps.has_tcl) {
- /* Set the best hw format in case the original format is not
- * supported by hw. */
- for (i = 0; i < count; i++) {
- velems->hw_format[i] = velems->velem[i].src_format;
- format = &velems->hw_format[i];
-
- /* This is basically the list of unsupported formats.
- * For now we don't care about the alignment, that's going to
- * be sorted out after the PSC setup. */
- switch (*format) {
- FORMAT_REPLACE(R64_FLOAT, R32_FLOAT);
- FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT);
- FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT);
- FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT);
-
- FORMAT_REPLACE(R32_UNORM, R32_FLOAT);
- FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT);
- FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT);
- FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT);
-
- FORMAT_REPLACE(R32_USCALED, R32_FLOAT);
- FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT);
- FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT);
- FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT);
-
- FORMAT_REPLACE(R32_SNORM, R32_FLOAT);
- FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT);
- FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT);
- FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT);
-
- FORMAT_REPLACE(R32_SSCALED, R32_FLOAT);
- FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT);
- FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT);
- FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT);
-
- FORMAT_REPLACE(R32_FIXED, R32_FLOAT);
- FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT);
- FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT);
- FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT);
-
- default:;
- }
+ if (!velems)
+ return NULL;
- velems->incompatible_layout =
- velems->incompatible_layout ||
- velems->velem[i].src_format != velems->hw_format[i] ||
- velems->velem[i].src_offset % 4 != 0;
- }
+ velems->count = count;
+ velems->vmgr_elements =
+ u_vbuf_mgr_create_vertex_elements(r300->vbuf_mgr, count, attribs,
+ velems->velem);
- /* Now setup PSC.
- * The unused components will be replaced by (..., 0, 1). */
- r300_vertex_psc(velems);
-
- /* Align the formats to the size of DWORD.
- * We only care about the blocksizes of the formats since
- * swizzles are already set up.
- * Also compute the vertex size. */
- for (i = 0; i < count; i++) {
- /* This is OK because we check for aligned strides too
- * elsewhere. */
- velems->hw_format_size[i] =
- align(util_format_get_blocksize(velems->hw_format[i]), 4);
- velems->vertex_size_dwords += velems->hw_format_size[i] / 4;
- }
+ if (r300_screen(pipe->screen)->caps.has_tcl) {
+ /* Setup PSC.
+ * The unused components will be replaced by (..., 0, 1). */
+ r300_vertex_psc(velems);
+
+ for (i = 0; i < count; i++) {
+ velems->format_size[i] =
+ align(util_format_get_blocksize(velems->velem[i].src_format), 4);
+ velems->vertex_size_dwords += velems->format_size[i] / 4;
}
}
+
return velems;
}
@@ -1712,6 +1709,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
r300->velems = velems;
+ u_vbuf_mgr_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements);
+
if (r300->draw) {
draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
return;
@@ -1719,12 +1718,16 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
- r300->aos_dirty = TRUE;
+ r300->vertex_arrays_dirty = TRUE;
}
static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
{
- FREE(state);
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_vertex_element_state *velems = state;
+
+ u_vbuf_mgr_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements);
+ FREE(state);
}
static void* r300_create_vs_state(struct pipe_context* pipe,
@@ -1811,6 +1814,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
{
struct r300_context* r300 = r300_context(pipe);
struct r300_constant_buffer *cbuf;
+ struct r300_resource *rbuf = r300_resource(buf);
uint32_t *mapped;
switch (shader) {
@@ -1824,14 +1828,18 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
return;
}
- if (buf == NULL || buf->width0 == 0 ||
- (mapped = (uint32_t*)r300_buffer(buf)->constant_buffer) == NULL) {
+ if (buf == NULL || buf->width0 == 0)
+ return;
+
+ if (rbuf->b.user_ptr)
+ mapped = (uint32_t*)rbuf->b.user_ptr;
+ else if (rbuf->constant_buffer)
+ mapped = (uint32_t*)rbuf->constant_buffer;
+ else
return;
- }
if (shader == PIPE_SHADER_FRAGMENT ||
(shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) {
- assert((buf->width0 % (4 * sizeof(float))) == 0);
cbuf->ptr = mapped;
}
@@ -1862,6 +1870,14 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
}
}
+static void r300_texture_barrier(struct pipe_context *pipe)
+{
+ struct r300_context *r300 = r300_context(pipe);
+
+ r300_mark_atom_dirty(r300, &r300->gpu_flush);
+ r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
+}
+
void r300_init_state_functions(struct r300_context* r300)
{
r300->context.create_blend_state = r300_create_blend_state;
@@ -1908,6 +1924,7 @@ void r300_init_state_functions(struct r300_context* r300)
r300->context.set_vertex_buffers = r300_set_vertex_buffers;
r300->context.set_index_buffer = r300_set_index_buffer;
+ r300->context.redefine_user_buffer = u_default_redefine_user_buffer;
r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
@@ -1916,4 +1933,6 @@ void r300_init_state_functions(struct r300_context* r300)
r300->context.create_vs_state = r300_create_vs_state;
r300->context.bind_vs_state = r300_bind_vs_state;
r300->context.delete_vs_state = r300_delete_vs_state;
+
+ r300->context.texture_barrier = r300_texture_barrier;
}
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index d5fc8ece25..ec00e2552c 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -29,10 +29,8 @@
#include "r300_context.h"
#include "r300_fs.h"
-#include "r300_hyperz.h"
#include "r300_screen.h"
#include "r300_shader_semantics.h"
-#include "r300_state_derived.h"
#include "r300_state_inlines.h"
#include "r300_texture.h"
#include "r300_vs.h"
@@ -490,7 +488,8 @@ static void r300_update_rs_block(struct r300_context *r300)
for (; i < ATTR_GENERIC_COUNT; i++) {
if (fs_inputs->generic[i] != ATTR_UNUSED) {
fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, "
- "not enough hardware slots.\n", i);
+ "not enough hardware slots (it's not a bug, do not "
+ "report it).\n", i);
}
}
@@ -525,7 +524,8 @@ static void r300_update_rs_block(struct r300_context *r300)
DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n");
} else {
fprintf(stderr, "r300: ERROR: FS input fog unassigned, "
- "not enough hardware slots.\n");
+ "not enough hardware slots. (it's not a bug, "
+ "do not report it)\n");
}
}
}
@@ -552,7 +552,8 @@ static void r300_update_rs_block(struct r300_context *r300)
} else {
if (fs_inputs->wpos != ATTR_UNUSED && tex_count >= 8) {
fprintf(stderr, "r300: ERROR: FS input WPOS unassigned, "
- "not enough hardware slots.\n");
+ "not enough hardware slots. (it's not a bug, do not "
+ "report it)\n");
}
}
@@ -640,11 +641,36 @@ static uint32_t r300_get_border_color(enum pipe_format format,
/* Compressed formats. */
if (util_format_is_compressed(format)) {
- util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
- return uc.ui;
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ /* Add 1/32 to round the border color instead of truncating. */
+ /* The Y component is used for the border color. */
+ border_swizzled[1] = border_swizzled[2] + 1.0f/32;
+ util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc);
+ return uc.ui;
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ border_swizzled[0] = border_swizzled[2];
+ util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc);
+ return uc.ui;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ return uc.ui;
+ default:
+ util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+ return uc.ui;
+ }
}
switch (desc->channel[0].size) {
+ case 2:
+ util_pack_color(border_swizzled, PIPE_FORMAT_B2G3R3_UNORM, &uc);
+ break;
+
case 4:
util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc);
break;
@@ -671,7 +697,20 @@ static uint32_t r300_get_border_color(enum pipe_format format,
case 16:
if (desc->nr_channels <= 2) {
border_swizzled[0] = border_swizzled[2];
- util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc);
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
+ util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_FLOAT, &uc);
+ } else {
+ util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc);
+ }
+ } else {
+ util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ }
+ break;
+
+ case 32:
+ if (desc->nr_channels == 1) {
+ border_swizzled[0] = border_swizzled[2];
+ util_pack_color(border_swizzled, PIPE_FORMAT_R32_FLOAT, &uc);
} else {
util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
}
@@ -681,6 +720,25 @@ static uint32_t r300_get_border_color(enum pipe_format format,
return uc.ui;
}
+static boolean util_format_is_float(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ unsigned i;
+
+ if (!format)
+ return FALSE;
+
+ /* Find the first non-void channel. */
+ for (i = 0; i < 4; i++)
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
+ break;
+
+ if (i == 4)
+ return FALSE;
+
+ return desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT ? TRUE : FALSE;
+}
+
static void r300_merge_textures_and_samplers(struct r300_context* r300)
{
struct r300_textures_state *state =
@@ -688,7 +746,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
struct r300_texture_sampler_state *texstate;
struct r300_sampler_state *sampler;
struct r300_sampler_view *view;
- struct r300_texture *tex;
+ struct r300_resource *tex;
unsigned min_level, max_level, i, j, size;
unsigned count = MIN2(state->sampler_view_count,
state->sampler_state_count);
@@ -706,7 +764,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
state->tx_enable |= 1 << i;
view = state->sampler_views[i];
- tex = r300_texture(view->base.texture);
+ tex = r300_resource(view->base.texture);
sampler = state->sampler_states[i];
texstate = &state->regs[i];
@@ -722,32 +780,37 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
/* determine min/max levels */
max_level = MIN3(sampler->max_lod + view->base.u.tex.first_level,
- tex->desc.b.b.last_level, view->base.u.tex.last_level);
+ tex->b.b.b.last_level, view->base.u.tex.last_level);
min_level = MIN2(sampler->min_lod + view->base.u.tex.first_level,
max_level);
- if (tex->desc.is_npot && min_level > 0) {
+ if (tex->tex.is_npot && min_level > 0) {
/* Even though we do not implement mipmapping for NPOT
* textures, we should at least honor the minimum level
* which is allowed to be displayed. We do this by setting up
- * an i-th mipmap level as the zero level. */
- r300_texture_setup_format_state(r300->screen, &tex->desc,
+ * the i-th mipmap level as the zero level. */
+ unsigned offset = tex->tex_offset +
+ tex->tex.offset_in_bytes[min_level];
+
+ r300_texture_setup_format_state(r300->screen, tex,
min_level,
&texstate->format);
- texstate->format.tile_config |=
- tex->desc.offset_in_bytes[min_level] & 0xffffffe0;
- assert((tex->desc.offset_in_bytes[min_level] & 0x1f) == 0);
+ texstate->format.tile_config |= offset & 0xffffffe0;
+ assert((offset & 0x1f) == 0);
+ } else {
+ texstate->format.tile_config |= tex->tex_offset & 0xffffffe0;
+ assert((tex->tex_offset & 0x1f) == 0);
}
/* Assign a texture cache region. */
texstate->format.format1 |= view->texcache_region;
/* Depth textures are kinda special. */
- if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) {
+ if (util_format_is_depth_or_stencil(tex->b.b.b.format)) {
unsigned char depth_swizzle[4];
if (!r300->screen->caps.is_r500 &&
- util_format_get_blocksizebits(tex->desc.b.b.format) == 32) {
+ util_format_get_blocksizebits(tex->b.b.b.format) == 32) {
/* X24x8 is sampled as Y16X16 on r3xx-r4xx.
* The depth here is at the Y component. */
for (j = 0; j < 4; j++)
@@ -772,17 +835,17 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
if (r300->screen->caps.dxtc_swizzle &&
- util_format_is_compressed(tex->desc.b.b.format)) {
+ util_format_is_compressed(tex->b.b.b.format)) {
texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE;
}
/* to emulate 1D textures through 2D ones correctly */
- if (tex->desc.b.b.target == PIPE_TEXTURE_1D) {
+ if (tex->b.b.b.target == PIPE_TEXTURE_1D) {
texstate->filter0 &= ~R300_TX_WRAP_T_MASK;
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
}
- if (tex->desc.is_npot) {
+ if (tex->tex.is_npot) {
/* NPOT textures don't support mip filter, unfortunately.
* This prevents incorrect rendering. */
texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK;
@@ -811,6 +874,32 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
}
+ /* Float textures only support nearest and mip-nearest filtering. */
+ if (util_format_is_float(tex->b.b.b.format)) {
+ /* No MAG linear filtering. */
+ if ((texstate->filter0 & R300_TX_MAG_FILTER_MASK) ==
+ R300_TX_MAG_FILTER_LINEAR) {
+ texstate->filter0 &= ~R300_TX_MAG_FILTER_MASK;
+ texstate->filter0 |= R300_TX_MAG_FILTER_NEAREST;
+ }
+ /* No MIN linear filtering. */
+ if ((texstate->filter0 & R300_TX_MIN_FILTER_MASK) ==
+ R300_TX_MIN_FILTER_LINEAR) {
+ texstate->filter0 &= ~R300_TX_MIN_FILTER_MASK;
+ texstate->filter0 |= R300_TX_MIN_FILTER_NEAREST;
+ }
+ /* No mipmap linear filtering. */
+ if ((texstate->filter0 & R300_TX_MIN_FILTER_MIP_MASK) ==
+ R300_TX_MIN_FILTER_MIP_LINEAR) {
+ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK;
+ texstate->filter0 |= R300_TX_MIN_FILTER_MIP_NEAREST;
+ }
+ /* No anisotropic filtering. */
+ texstate->filter0 &= ~R300_TX_MAX_ANISO_MASK;
+ texstate->filter1 &= ~R500_TX_MAX_ANISO_MASK;
+ texstate->filter1 &= ~R500_TX_ANISO_HIGH_QUALITY;
+ }
+
texstate->filter0 |= i << 28;
size += 16;
@@ -859,44 +948,35 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
}
-/* We can't use compressed zbuffers as samplers. */
-static void r300_flush_depth_textures(struct r300_context *r300)
+static void r300_decompress_depth_textures(struct r300_context *r300)
{
struct r300_textures_state *state =
(struct r300_textures_state*)r300->textures_state.state;
- unsigned i, level;
+ struct pipe_resource *tex;
unsigned count = MIN2(state->sampler_view_count,
state->sampler_state_count);
+ unsigned i;
- if (r300->z_decomp_rd)
+ if (!r300->hyperz_locked || !r300->locked_zbuffer) {
return;
+ }
- for (i = 0; i < count; i++)
+ for (i = 0; i < count; i++) {
if (state->sampler_views[i] && state->sampler_states[i]) {
- struct pipe_resource *tex = state->sampler_views[i]->base.texture;
-
- if (tex->target == PIPE_TEXTURE_3D ||
- tex->target == PIPE_TEXTURE_CUBE)
- continue;
-
- /* Ignore non-depth textures.
- * Also ignore reinterpreted depth textures, e.g. resource_copy. */
- if (!util_format_is_depth_or_stencil(tex->format))
- continue;
-
- for (level = 0; level <= tex->last_level; level++)
- if (r300_texture(tex)->zmask_in_use[level]) {
- /* We don't handle 3D textures and cubemaps yet. */
- r300_flush_depth_stencil(&r300->context, tex, level, 0);
- }
+ tex = state->sampler_views[i]->base.texture;
+
+ if (tex == r300->locked_zbuffer->texture) {
+ r300_decompress_zmask_locked(r300);
+ return;
+ }
}
+ }
}
void r300_update_derived_state(struct r300_context* r300)
{
- r300_flush_depth_textures(r300);
-
if (r300->textures_state.dirty) {
+ r300_decompress_depth_textures(r300);
r300_merge_textures_and_samplers(r300);
}
diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h
deleted file mode 100644
index 71a4a47b00..0000000000
--- a/src/gallium/drivers/r300/r300_state_derived.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_STATE_DERIVED_H
-#define R300_STATE_DERIVED_H
-
-struct r300_context;
-
-void r300_update_derived_state(struct r300_context* r300);
-
-#endif /* R300_STATE_DERIVED_H */
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 7e501221b1..54dae1acd9 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -25,13 +25,9 @@
#define R300_STATE_INLINES_H
#include "draw/draw_vertex.h"
-
#include "pipe/p_format.h"
-
#include "util/u_format.h"
-
#include "r300_reg.h"
-
#include <stdio.h>
/* Some maths. These should probably find their way to u_math, if needed. */
@@ -42,23 +38,24 @@ static INLINE int pack_float_16_6x(float f) {
/* Blend state. */
-static INLINE uint32_t r300_translate_blend_function(int blend_func)
+static INLINE uint32_t r300_translate_blend_function(int blend_func,
+ boolean clamp)
{
switch (blend_func) {
- case PIPE_BLEND_ADD:
- return R300_COMB_FCN_ADD_CLAMP;
- case PIPE_BLEND_SUBTRACT:
- return R300_COMB_FCN_SUB_CLAMP;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return R300_COMB_FCN_RSUB_CLAMP;
- case PIPE_BLEND_MIN:
- return R300_COMB_FCN_MIN;
- case PIPE_BLEND_MAX:
- return R300_COMB_FCN_MAX;
- default:
- fprintf(stderr, "r300: Unknown blend function %d\n", blend_func);
- assert(0);
- break;
+ case PIPE_BLEND_ADD:
+ return clamp ? R300_COMB_FCN_ADD_CLAMP : R300_COMB_FCN_ADD_NOCLAMP;
+ case PIPE_BLEND_SUBTRACT:
+ return clamp ? R300_COMB_FCN_SUB_CLAMP : R300_COMB_FCN_SUB_NOCLAMP;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return clamp ? R300_COMB_FCN_RSUB_CLAMP : R300_COMB_FCN_RSUB_NOCLAMP;
+ case PIPE_BLEND_MIN:
+ return R300_COMB_FCN_MIN;
+ case PIPE_BLEND_MAX:
+ return R300_COMB_FCN_MAX;
+ default:
+ fprintf(stderr, "r300: Unknown blend function %d\n", blend_func);
+ assert(0);
+ break;
}
return 0;
}
@@ -341,24 +338,6 @@ static INLINE uint32_t r500_anisotropy(unsigned max_aniso)
R500_TX_ANISO_HIGH_QUALITY;
}
-/* Non-CSO state. (For now.) */
-
-static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
-{
- switch (pipe_count) {
- case 1:
- return R300_GB_TILE_PIPE_COUNT_RV300;
- case 2:
- return R300_GB_TILE_PIPE_COUNT_R300;
- case 3:
- return R300_GB_TILE_PIPE_COUNT_R420_3P;
- case 4:
- return R300_GB_TILE_PIPE_COUNT_R420;
- }
- return 0;
-}
-
-
/* Translate pipe_formats into PSC vertex types. */
static INLINE uint16_t
r300_translate_vertex_data_type(enum pipe_format format) {
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 70fc5d96d8..c650fb7ed3 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -171,8 +171,18 @@ uint32_t r300_translate_texformat(enum pipe_format format,
}
}
- result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
- util_format_is_compressed(format) && dxtc_swizzle);
+ if (util_format_is_compressed(format) &&
+ dxtc_swizzle &&
+ format != PIPE_FORMAT_RGTC2_UNORM &&
+ format != PIPE_FORMAT_RGTC2_SNORM &&
+ format != PIPE_FORMAT_LATC2_UNORM &&
+ format != PIPE_FORMAT_LATC2_SNORM) {
+ result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
+ TRUE);
+ } else {
+ result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
+ FALSE);
+ }
/* S3TC formats. */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
@@ -197,10 +207,25 @@ uint32_t r300_translate_texformat(enum pipe_format format,
}
}
- /* Add sign. */
- for (i = 0; i < desc->nr_channels; i++) {
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
- result |= sign_bit[i];
+ /* RGTC formats. */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ result |= sign_bit[1];
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_RGTC1_UNORM:
+ return R500_TX_FORMAT_ATI1N | result;
+
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ result |= sign_bit[2] | sign_bit[3];
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ return R400_TX_FORMAT_ATI2N | result;
+
+ default:
+ return ~0; /* Unsupported/unknown. */
}
}
@@ -211,17 +236,10 @@ uint32_t r300_translate_texformat(enum pipe_format format,
return R300_TX_FORMAT_CxV8U8 | result;
}
- /* RGTC formats. */
- if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
- switch (format) {
- case PIPE_FORMAT_RGTC1_UNORM:
- case PIPE_FORMAT_RGTC1_SNORM:
- return R500_TX_FORMAT_ATI1N | result;
- case PIPE_FORMAT_RGTC2_UNORM:
- case PIPE_FORMAT_RGTC2_SNORM:
- return R400_TX_FORMAT_ATI2N | result;
- default:
- return ~0; /* Unsupported/unknown. */
+ /* Add sign. */
+ for (i = 0; i < desc->nr_channels; i++) {
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ result |= sign_bit[i];
}
}
@@ -244,6 +262,11 @@ uint32_t r300_translate_texformat(enum pipe_format format,
desc->channel[2].size == 6) {
return R300_TX_FORMAT_Z6Y5X5 | result;
}
+ if (desc->channel[0].size == 2 &&
+ desc->channel[1].size == 3 &&
+ desc->channel[2].size == 3) {
+ return R300_TX_FORMAT_Z3Y3X2 | result;
+ }
return ~0; /* Unsupported/unknown. */
case 4:
@@ -348,6 +371,8 @@ uint32_t r500_tx_format_msb_bit(enum pipe_format format)
switch (format) {
case PIPE_FORMAT_RGTC1_UNORM:
case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
return R500_TXFORMAT_MSB;
@@ -365,14 +390,18 @@ static uint32_t r300_translate_colorformat(enum pipe_format format)
switch (format) {
/* 8-bit buffers. */
case PIPE_FORMAT_A8_UNORM:
+ /*case PIPE_FORMAT_A8_SNORM:*/
case PIPE_FORMAT_I8_UNORM:
+ /*case PIPE_FORMAT_I8_SNORM:*/
case PIPE_FORMAT_L8_UNORM:
+ /*case PIPE_FORMAT_L8_SNORM:*/
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return R300_COLOR_FORMAT_I8;
/* 16-bit buffers. */
case PIPE_FORMAT_L8A8_UNORM:
+ /*case PIPE_FORMAT_L8A8_SNORM:*/
case PIPE_FORMAT_R8G8_UNORM:
case PIPE_FORMAT_R8G8_SNORM:
return R300_COLOR_FORMAT_UV88;
@@ -390,13 +419,21 @@ static uint32_t r300_translate_colorformat(enum pipe_format format)
/* 32-bit buffers. */
case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/
case PIPE_FORMAT_B8G8R8X8_UNORM:
+ /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/
case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/
case PIPE_FORMAT_X8R8G8B8_UNORM:
+ /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/
case PIPE_FORMAT_A8B8G8R8_UNORM:
+ /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8A8_SNORM:
case PIPE_FORMAT_X8B8G8R8_UNORM:
+ /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/
case PIPE_FORMAT_R8G8B8X8_UNORM:
+ /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/
case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
return R300_COLOR_FORMAT_ARGB8888;
@@ -481,6 +518,8 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
} else {
if (desc->channel[i].size == 16) {
modifier |= R300_US_OUT_FMT_C4_16;
+ } else if (desc->channel[i].size == 10) {
+ modifier |= R300_US_OUT_FMT_C4_10;
} else {
/* C4_8 seems to be used for the formats whose pixel size
* is <= 32 bits. */
@@ -499,9 +538,12 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
/* 8-bit outputs, one channel.
* COLORFORMAT_I8 stores the C2 component. */
case PIPE_FORMAT_A8_UNORM:
+ /*case PIPE_FORMAT_A8_SNORM:*/
return modifier | R300_C2_SEL_A;
case PIPE_FORMAT_I8_UNORM:
+ /*case PIPE_FORMAT_I8_SNORM:*/
case PIPE_FORMAT_L8_UNORM:
+ /*case PIPE_FORMAT_L8_SNORM:*/
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return modifier | R300_C2_SEL_R;
@@ -509,6 +551,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
/* 16-bit outputs, two channels.
* COLORFORMAT_UV88 stores C2 and C0. */
case PIPE_FORMAT_L8A8_UNORM:
+ /*case PIPE_FORMAT_L8A8_SNORM:*/
return modifier | R300_C0_SEL_A | R300_C2_SEL_R;
case PIPE_FORMAT_R8G8_UNORM:
case PIPE_FORMAT_R8G8_SNORM:
@@ -521,7 +564,9 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_B4G4R4X4_UNORM:
case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/
case PIPE_FORMAT_B8G8R8X8_UNORM:
+ /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/
case PIPE_FORMAT_B10G10R10A2_UNORM:
return modifier |
R300_C0_SEL_B | R300_C1_SEL_G |
@@ -529,20 +574,26 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
/* ARGB outputs. */
case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/
case PIPE_FORMAT_X8R8G8B8_UNORM:
+ /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/
return modifier |
R300_C0_SEL_A | R300_C1_SEL_R |
R300_C2_SEL_G | R300_C3_SEL_B;
/* ABGR outputs. */
case PIPE_FORMAT_A8B8G8R8_UNORM:
+ /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/
case PIPE_FORMAT_X8B8G8R8_UNORM:
+ /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/
return modifier |
R300_C0_SEL_A | R300_C1_SEL_B |
R300_C2_SEL_G | R300_C3_SEL_R;
/* RGBA outputs. */
case PIPE_FORMAT_R8G8B8X8_UNORM:
+ /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8A8_SNORM:
case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
case PIPE_FORMAT_R10G10B10A2_UNORM:
@@ -578,11 +629,12 @@ boolean r300_is_sampler_format_supported(enum pipe_format format)
}
void r300_texture_setup_format_state(struct r300_screen *screen,
- struct r300_texture_desc *desc,
+ struct r300_resource *tex,
unsigned level,
struct r300_texture_format_state *out)
{
- struct pipe_resource *pt = &desc->b.b;
+ struct pipe_resource *pt = &tex->b.b.b;
+ struct r300_texture_desc *desc = &tex->tex;
boolean is_r500 = screen->caps.is_r500;
/* Mask out all the fields we change. */
@@ -625,163 +677,147 @@ void r300_texture_setup_format_state(struct r300_screen *screen,
R300_TXO_MICRO_TILE(desc->microtile);
}
-static void r300_texture_setup_fb_state(struct r300_screen* screen,
- struct r300_texture* tex)
+static void r300_texture_setup_fb_state(struct r300_surface *surf)
{
- unsigned i;
+ struct r300_resource *tex = r300_resource(surf->base.texture);
+ unsigned level = surf->base.u.tex.level;
/* Set framebuffer state. */
- if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) {
- for (i = 0; i <= tex->desc.b.b.last_level; i++) {
- tex->fb_state.pitch[i] =
- tex->desc.stride_in_pixels[i] |
- R300_DEPTHMACROTILE(tex->desc.macrotile[i]) |
- R300_DEPTHMICROTILE(tex->desc.microtile);
- }
- tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format);
+ if (util_format_is_depth_or_stencil(surf->base.format)) {
+ surf->pitch =
+ tex->tex.stride_in_pixels[level] |
+ R300_DEPTHMACROTILE(tex->tex.macrotile[level]) |
+ R300_DEPTHMICROTILE(tex->tex.microtile);
+ surf->format = r300_translate_zsformat(surf->base.format);
+ surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level];
+ surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level];
} else {
- for (i = 0; i <= tex->desc.b.b.last_level; i++) {
- tex->fb_state.pitch[i] =
- tex->desc.stride_in_pixels[i] |
- r300_translate_colorformat(tex->desc.b.b.format) |
- R300_COLOR_TILE(tex->desc.macrotile[i]) |
- R300_COLOR_MICROTILE(tex->desc.microtile);
- }
- tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format);
+ surf->pitch =
+ tex->tex.stride_in_pixels[level] |
+ r300_translate_colorformat(surf->base.format) |
+ R300_COLOR_TILE(tex->tex.macrotile[level]) |
+ R300_COLOR_MICROTILE(tex->tex.microtile);
+ surf->format = r300_translate_out_fmt(surf->base.format);
}
}
-void r300_texture_reinterpret_format(struct pipe_screen *screen,
+boolean r300_resource_set_properties(struct pipe_screen *screen,
struct pipe_resource *tex,
- enum pipe_format new_format)
+ unsigned offset,
+ const struct pipe_resource *new_properties)
{
- struct r300_screen *r300screen = r300_screen(screen);
+ struct r300_screen *rscreen = r300_screen(screen);
+ struct r300_resource *res = r300_resource(tex);
- SCREEN_DBG(r300screen, DBG_TEX,
- "r300: texture_reinterpret_format: %s -> %s\n",
+ SCREEN_DBG(rscreen, DBG_TEX,
+ "r300: texture_set_properties: %s -> %s\n",
util_format_short_name(tex->format),
- util_format_short_name(new_format));
-
- tex->format = new_format;
-
- r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex));
-}
-
-static unsigned r300_texture_is_referenced(struct pipe_context *context,
- struct pipe_resource *texture,
- unsigned level, int layer)
-{
- struct r300_context *r300 = r300_context(context);
- struct r300_texture *rtex = (struct r300_texture *)texture;
+ util_format_short_name(new_properties->format));
- if (r300->rws->cs_is_buffer_referenced(r300->cs,
- rtex->cs_buffer, R300_REF_CS))
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ if (!r300_texture_desc_init(rscreen, res, new_properties)) {
+ fprintf(stderr, "r300: ERROR: Cannot set texture properties.\n");
+ return FALSE;
+ }
+ res->tex_offset = offset;
+ r300_texture_setup_format_state(rscreen, res, 0, &res->tx_format);
- return PIPE_UNREFERENCED;
+ return TRUE;
}
static void r300_texture_destroy(struct pipe_screen *screen,
struct pipe_resource* texture)
{
- struct r300_texture* tex = (struct r300_texture*)texture;
- struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys;
- int i;
-
- rws->buffer_reference(rws, &tex->buffer, NULL);
- for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) {
- if (tex->hiz_mem[i])
- u_mmFreeMem(tex->hiz_mem[i]);
- if (tex->zmask_mem[i])
- u_mmFreeMem(tex->zmask_mem[i]);
- }
+ struct r300_resource* tex = (struct r300_resource*)texture;
+ r300_winsys_bo_reference(&tex->buf, NULL);
FREE(tex);
}
-static boolean r300_texture_get_handle(struct pipe_screen* screen,
- struct pipe_resource *texture,
- struct winsys_handle *whandle)
+boolean r300_resource_get_handle(struct pipe_screen* screen,
+ struct pipe_resource *texture,
+ struct winsys_handle *whandle)
{
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys;
- struct r300_texture* tex = (struct r300_texture*)texture;
+ struct r300_resource* tex = (struct r300_resource*)texture;
if (!tex) {
return FALSE;
}
- return rws->buffer_get_handle(rws, tex->buffer,
- tex->desc.stride_in_bytes[0], whandle);
+ return rws->buffer_get_handle(tex->buf,
+ tex->tex.stride_in_bytes[0], whandle);
}
-struct u_resource_vtbl r300_texture_vtbl =
+static const struct u_resource_vtbl r300_texture_vtbl =
{
- r300_texture_get_handle, /* get_handle */
- r300_texture_destroy, /* resource_destroy */
- r300_texture_is_referenced, /* is_resource_referenced */
- r300_texture_get_transfer, /* get_transfer */
- r300_texture_transfer_destroy, /* transfer_destroy */
- r300_texture_transfer_map, /* transfer_map */
- u_default_transfer_flush_region, /* transfer_flush_region */
- r300_texture_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
+ NULL, /* get_handle */
+ r300_texture_destroy, /* resource_destroy */
+ r300_texture_get_transfer, /* get_transfer */
+ r300_texture_transfer_destroy, /* transfer_destroy */
+ r300_texture_transfer_map, /* transfer_map */
+ NULL, /* transfer_flush_region */
+ r300_texture_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
};
/* The common texture constructor. */
-static struct r300_texture*
+static struct r300_resource*
r300_texture_create_object(struct r300_screen *rscreen,
const struct pipe_resource *base,
enum r300_buffer_tiling microtile,
enum r300_buffer_tiling macrotile,
unsigned stride_in_bytes_override,
unsigned max_buffer_size,
- struct r300_winsys_buffer *buffer)
+ struct r300_winsys_bo *buffer)
{
struct r300_winsys_screen *rws = rscreen->rws;
- struct r300_texture *tex = CALLOC_STRUCT(r300_texture);
+ struct r300_resource *tex = CALLOC_STRUCT(r300_resource);
if (!tex) {
if (buffer)
- rws->buffer_reference(rws, &buffer, NULL);
+ r300_winsys_bo_reference(&buffer, NULL);
return NULL;
}
- /* Initialize the descriptor. */
- if (!r300_texture_desc_init(rscreen, &tex->desc, base,
- microtile, macrotile,
- stride_in_bytes_override,
- max_buffer_size)) {
+ pipe_reference_init(&tex->b.b.b.reference, 1);
+ tex->b.b.b.screen = &rscreen->screen;
+ tex->b.b.b.usage = base->usage;
+ tex->b.b.b.bind = base->bind;
+ tex->b.b.b.flags = base->flags;
+ tex->b.b.vtbl = &r300_texture_vtbl;
+ tex->tex.microtile = microtile;
+ tex->tex.macrotile[0] = macrotile;
+ tex->tex.stride_in_bytes_override = stride_in_bytes_override;
+ tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ?
+ R300_DOMAIN_GTT :
+ R300_DOMAIN_VRAM | R300_DOMAIN_GTT;
+ tex->buf_size = max_buffer_size;
+
+ if (!r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, 0, base)) {
if (buffer)
- rws->buffer_reference(rws, &buffer, NULL);
+ r300_winsys_bo_reference(&buffer, NULL);
FREE(tex);
return NULL;
}
- /* Initialize the hardware state. */
- r300_texture_setup_format_state(rscreen, &tex->desc, 0, &tex->tx_format);
- r300_texture_setup_fb_state(rscreen, tex);
-
- tex->desc.b.vtbl = &r300_texture_vtbl;
- pipe_reference_init(&tex->desc.b.b.reference, 1);
- tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ?
- R300_DOMAIN_GTT :
- R300_DOMAIN_VRAM | R300_DOMAIN_GTT;
- tex->buffer = buffer;
/* Create the backing buffer if needed. */
- if (!tex->buffer) {
- tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048,
+ if (!buffer) {
+ tex->buf_size = tex->tex.size_in_bytes;
+ tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048,
base->bind, base->usage, tex->domain);
- if (!tex->buffer) {
+ if (!tex->buf) {
FREE(tex);
return NULL;
}
+ } else {
+ tex->buf = buffer;
}
- tex->cs_buffer = rws->buffer_get_cs_handle(rws, tex->buffer);
+ tex->cs_buf = rws->buffer_get_cs_handle(tex->buf);
- rws->buffer_set_tiling(rws, tex->buffer,
- tex->desc.microtile, tex->desc.macrotile[0],
- tex->desc.stride_in_bytes[0]);
+ rws->buffer_set_tiling(tex->buf, NULL,
+ tex->tex.microtile, tex->tex.macrotile[0],
+ tex->tex.stride_in_bytes[0]);
return tex;
}
@@ -813,7 +849,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
{
struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys;
struct r300_screen *rscreen = r300_screen(screen);
- struct r300_winsys_buffer *buffer;
+ struct r300_winsys_bo *buffer;
enum r300_buffer_tiling microtile, macrotile;
unsigned stride, size;
@@ -829,7 +865,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
if (!buffer)
return NULL;
- rws->buffer_get_tiling(rws, buffer, &microtile, &macrotile);
+ rws->buffer_get_tiling(buffer, &microtile, &macrotile);
/* Enforce a microtiled zbuffer. */
if (util_format_is_depth_or_stencil(base->format) &&
@@ -840,8 +876,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
break;
case 2:
- if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT))
- microtile = R300_BUFFER_SQUARETILED;
+ microtile = R300_BUFFER_SQUARETILED;
break;
}
}
@@ -857,7 +892,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
struct pipe_resource* texture,
const struct pipe_surface *surf_tmpl)
{
- struct r300_texture* tex = r300_texture(texture);
+ struct r300_resource* tex = r300_resource(texture);
struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
unsigned level = surf_tmpl->u.tex.level;
@@ -877,29 +912,28 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
- surface->buffer = tex->buffer;
- surface->cs_buffer = tex->cs_buffer;
+ surface->buf = tex->buf;
+ surface->cs_buf = tex->cs_buf;
/* Prefer VRAM if there are multiple domains to choose from. */
surface->domain = tex->domain;
if (surface->domain & R300_DOMAIN_VRAM)
surface->domain &= ~R300_DOMAIN_GTT;
- surface->offset = r300_texture_get_offset(&tex->desc, level,
+ surface->offset = r300_texture_get_offset(tex, level,
surf_tmpl->u.tex.first_layer);
- surface->pitch = tex->fb_state.pitch[level];
- surface->format = tex->fb_state.format;
+ r300_texture_setup_fb_state(surface);
/* Parameters for the CBZB clear. */
- surface->cbzb_allowed = tex->desc.cbzb_allowed[level];
+ surface->cbzb_allowed = tex->tex.cbzb_allowed[level];
surface->cbzb_width = align(surface->base.width, 64);
/* Height must be aligned to the size of a tile. */
- tile_height = r300_get_pixel_alignment(tex->desc.b.b.format,
- tex->desc.b.b.nr_samples,
- tex->desc.microtile,
- tex->desc.macrotile[level],
- DIM_HEIGHT);
+ tile_height = r300_get_pixel_alignment(tex->b.b.b.format,
+ tex->b.b.b.nr_samples,
+ tex->tex.microtile,
+ tex->tex.macrotile[level],
+ DIM_HEIGHT, 0);
surface->cbzb_height = align((surface->base.height + 1) / 2,
tile_height);
@@ -907,7 +941,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
/* Offset must be aligned to 2K and must point at the beginning
* of a scanline. */
offset = surface->offset +
- tex->desc.stride_in_bytes[level] * surface->cbzb_height;
+ tex->tex.stride_in_bytes[level] * surface->cbzb_height;
surface->cbzb_midpoint_offset = offset & ~2047;
surface->cbzb_pitch = surface->pitch & 0x1ffffc;
@@ -922,8 +956,8 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
surface->cbzb_allowed ? "YES" : " NO",
surface->cbzb_width, surface->cbzb_height,
offset & 2047,
- tex->desc.microtile ? "YES" : " NO",
- tex->desc.macrotile[level] ? "YES" : " NO");
+ tex->tex.microtile ? "YES" : " NO",
+ tex->tex.macrotile[level] ? "YES" : " NO");
}
return &surface->base;
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 0ab22f747e..158a387478 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -32,7 +32,7 @@ struct pipe_resource;
struct winsys_handle;
struct r300_texture_format_state;
struct r300_texture_desc;
-struct r300_texture;
+struct r300_resource;
struct r300_screen;
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
@@ -46,9 +46,10 @@ uint32_t r300_translate_texformat(enum pipe_format format,
uint32_t r500_tx_format_msb_bit(enum pipe_format format);
-void r300_texture_reinterpret_format(struct pipe_screen *screen,
+boolean r300_resource_set_properties(struct pipe_screen *screen,
struct pipe_resource *tex,
- enum pipe_format new_format);
+ unsigned offset,
+ const struct pipe_resource *new_properties);
boolean r300_is_colorbuffer_format_supported(enum pipe_format format);
@@ -57,10 +58,14 @@ boolean r300_is_zs_format_supported(enum pipe_format format);
boolean r300_is_sampler_format_supported(enum pipe_format format);
void r300_texture_setup_format_state(struct r300_screen *screen,
- struct r300_texture_desc *desc,
+ struct r300_resource *tex,
unsigned level,
struct r300_texture_format_state *out);
+boolean r300_resource_get_handle(struct pipe_screen* screen,
+ struct pipe_resource *texture,
+ struct winsys_handle *whandle);
+
struct pipe_resource*
r300_texture_from_handle(struct pipe_screen* screen,
const struct pipe_resource* base,
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index aa82c47151..2910666dd5 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -34,7 +34,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
unsigned num_samples,
enum r300_buffer_tiling microtile,
enum r300_buffer_tiling macrotile,
- enum r300_dim dim)
+ enum r300_dim dim, boolean is_rs690)
{
static const unsigned table[2][5][3][2] =
{
@@ -57,6 +57,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
{{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
}
};
+
static const unsigned aa_block[2] = {4, 8};
unsigned tile = 0;
unsigned pixsize = util_format_get_blocksize(format);
@@ -74,6 +75,14 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
} else {
/* Standard alignment. */
tile = table[macrotile][util_logbase2(pixsize)][microtile][dim];
+ if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) {
+ int align;
+ int h_tile;
+ h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT];
+ align = 64 / (pixsize * h_tile);
+ if (tile < align)
+ tile = align;
+ }
}
assert(tile);
@@ -81,19 +90,19 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
}
/* Return true if macrotiling should be enabled on the miplevel. */
-static boolean r300_texture_macro_switch(struct r300_texture_desc *desc,
+static boolean r300_texture_macro_switch(struct r300_resource *tex,
unsigned level,
boolean rv350_mode,
enum r300_dim dim)
{
unsigned tile, texdim;
- tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples,
- desc->microtile, R300_BUFFER_TILED, dim);
+ tile = r300_get_pixel_alignment(tex->b.b.b.format, tex->b.b.b.nr_samples,
+ tex->tex.microtile, R300_BUFFER_TILED, dim, 0);
if (dim == DIM_WIDTH) {
- texdim = u_minify(desc->width0, level);
+ texdim = u_minify(tex->tex.width0, level);
} else {
- texdim = u_minify(desc->height0, level);
+ texdim = u_minify(tex->tex.height0, level);
}
/* See TX_FILTER1_n.MACRO_SWITCH. */
@@ -109,91 +118,70 @@ static boolean r300_texture_macro_switch(struct r300_texture_desc *desc,
* at the given level.
*/
static unsigned r300_texture_get_stride(struct r300_screen *screen,
- struct r300_texture_desc *desc,
+ struct r300_resource *tex,
unsigned level)
{
unsigned tile_width, width, stride;
+ boolean is_rs690 = (screen->caps.family == CHIP_FAMILY_RS600 ||
+ screen->caps.family == CHIP_FAMILY_RS690 ||
+ screen->caps.family == CHIP_FAMILY_RS740);
- if (desc->stride_in_bytes_override)
- return desc->stride_in_bytes_override;
+ if (tex->tex.stride_in_bytes_override)
+ return tex->tex.stride_in_bytes_override;
/* Check the level. */
- if (level > desc->b.b.last_level) {
+ if (level > tex->b.b.b.last_level) {
SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n",
- __FUNCTION__, level, desc->b.b.last_level);
+ __FUNCTION__, level, tex->b.b.b.last_level);
return 0;
}
- width = u_minify(desc->width0, level);
+ width = u_minify(tex->tex.width0, level);
- if (util_format_is_plain(desc->b.b.format)) {
- tile_width = r300_get_pixel_alignment(desc->b.b.format,
- desc->b.b.nr_samples,
- desc->microtile,
- desc->macrotile[level],
- DIM_WIDTH);
+ if (util_format_is_plain(tex->b.b.b.format)) {
+ tile_width = r300_get_pixel_alignment(tex->b.b.b.format,
+ tex->b.b.b.nr_samples,
+ tex->tex.microtile,
+ tex->tex.macrotile[level],
+ DIM_WIDTH, is_rs690);
width = align(width, tile_width);
- stride = util_format_get_stride(desc->b.b.format, width);
-
- /* Some IGPs need a minimum stride of 64 bytes, hmm... */
- if (!desc->macrotile[level] &&
- (screen->caps.family == CHIP_FAMILY_RS600 ||
- screen->caps.family == CHIP_FAMILY_RS690 ||
- screen->caps.family == CHIP_FAMILY_RS740)) {
- unsigned min_stride;
-
- if (desc->microtile) {
- unsigned tile_height =
- r300_get_pixel_alignment(desc->b.b.format,
- desc->b.b.nr_samples,
- desc->microtile,
- desc->macrotile[level],
- DIM_HEIGHT);
-
- min_stride = 64 / tile_height;
- } else {
- min_stride = 64;
- }
-
- return stride < min_stride ? min_stride : stride;
- }
-
+ stride = util_format_get_stride(tex->b.b.b.format, width);
/* The alignment to 32 bytes is sort of implied by the layout... */
return stride;
} else {
- return align(util_format_get_stride(desc->b.b.format, width), 32);
+ return align(util_format_get_stride(tex->b.b.b.format, width), is_rs690 ? 64 : 32);
}
}
-static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
+static unsigned r300_texture_get_nblocksy(struct r300_resource *tex,
unsigned level,
boolean *out_aligned_for_cbzb)
{
unsigned height, tile_height;
- height = u_minify(desc->height0, level);
+ height = u_minify(tex->tex.height0, level);
- if (util_format_is_plain(desc->b.b.format)) {
- tile_height = r300_get_pixel_alignment(desc->b.b.format,
- desc->b.b.nr_samples,
- desc->microtile,
- desc->macrotile[level],
- DIM_HEIGHT);
- height = align(height, tile_height);
+ /* Mipmapped and 3D textures must have their height aligned to POT. */
+ if ((tex->b.b.b.target != PIPE_TEXTURE_1D &&
+ tex->b.b.b.target != PIPE_TEXTURE_2D &&
+ tex->b.b.b.target != PIPE_TEXTURE_RECT) ||
+ tex->b.b.b.last_level != 0) {
+ height = util_next_power_of_two(height);
+ }
- /* This is needed for the kernel checker, unfortunately. */
- if ((desc->b.b.target != PIPE_TEXTURE_1D &&
- desc->b.b.target != PIPE_TEXTURE_2D &&
- desc->b.b.target != PIPE_TEXTURE_RECT) ||
- desc->b.b.last_level != 0) {
- height = util_next_power_of_two(height);
- }
+ if (util_format_is_plain(tex->b.b.b.format)) {
+ tile_height = r300_get_pixel_alignment(tex->b.b.b.format,
+ tex->b.b.b.nr_samples,
+ tex->tex.microtile,
+ tex->tex.macrotile[level],
+ DIM_HEIGHT, 0);
+ height = align(height, tile_height);
/* See if the CBZB clear can be used on the buffer,
* taking the texture size into account. */
if (out_aligned_for_cbzb) {
- if (desc->macrotile[level]) {
+ if (tex->tex.macrotile[level]) {
/* When clearing, the layer (width*height) is horizontally split
* into two, and the upper and lower halves are cleared by the CB
* and ZB units, respectively. Therefore, the number of macrotiles
@@ -201,10 +189,10 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
/* Align the height so that there is an even number of macrotiles.
* Do so for 3 or more macrotiles in the Y direction. */
- if (level == 0 && desc->b.b.last_level == 0 &&
- (desc->b.b.target == PIPE_TEXTURE_1D ||
- desc->b.b.target == PIPE_TEXTURE_2D ||
- desc->b.b.target == PIPE_TEXTURE_RECT) &&
+ if (level == 0 && tex->b.b.b.last_level == 0 &&
+ (tex->b.b.b.target == PIPE_TEXTURE_1D ||
+ tex->b.b.b.target == PIPE_TEXTURE_2D ||
+ tex->b.b.b.target == PIPE_TEXTURE_RECT) &&
height >= tile_height * 3) {
height = align(height, tile_height * 2);
}
@@ -216,30 +204,7 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
}
}
- return util_format_get_nblocksy(desc->b.b.format, height);
-}
-
-static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen,
- struct r300_texture_desc *desc)
-{
- /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures
- * incorrectly. This is a workaround to prevent CS from being rejected. */
-
- unsigned i, size;
-
- if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) &&
- desc->b.b.target == PIPE_TEXTURE_3D &&
- desc->b.b.last_level > 0) {
- size = 0;
-
- for (i = 0; i <= desc->b.b.last_level; i++) {
- size += desc->stride_in_bytes[i] *
- r300_texture_get_nblocksy(desc, i, FALSE);
- }
-
- size *= desc->depth0;
- desc->size_in_bytes = size;
- }
+ return util_format_get_nblocksy(tex->b.b.b.format, height);
}
/* Get a width in pixels from a stride in bytes. */
@@ -251,15 +216,15 @@ static unsigned stride_to_width(enum pipe_format format,
}
static void r300_setup_miptree(struct r300_screen *screen,
- struct r300_texture_desc *desc,
+ struct r300_resource *tex,
boolean align_for_cbzb)
{
- struct pipe_resource *base = &desc->b.b;
+ struct pipe_resource *base = &tex->b.b.b;
unsigned stride, size, layer_size, nblocksy, i;
boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350;
boolean aligned_for_cbzb;
- desc->size_in_bytes = 0;
+ tex->tex.size_in_bytes = 0;
SCREEN_DBG(screen, DBG_TEXALLOC,
"r300: Making miptree for texture, format %s\n",
@@ -267,21 +232,21 @@ static void r300_setup_miptree(struct r300_screen *screen,
for (i = 0; i <= base->last_level; i++) {
/* Let's see if this miplevel can be macrotiled. */
- desc->macrotile[i] =
- (desc->macrotile[0] == R300_BUFFER_TILED &&
- r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) &&
- r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ?
+ tex->tex.macrotile[i] =
+ (tex->tex.macrotile[0] == R300_BUFFER_TILED &&
+ r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) &&
+ r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ?
R300_BUFFER_TILED : R300_BUFFER_LINEAR;
- stride = r300_texture_get_stride(screen, desc, i);
+ stride = r300_texture_get_stride(screen, tex, i);
/* Compute the number of blocks in Y, see if the CBZB clear can be
* used on the texture. */
aligned_for_cbzb = FALSE;
- if (align_for_cbzb && desc->cbzb_allowed[i])
- nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb);
+ if (align_for_cbzb && tex->tex.cbzb_allowed[i])
+ nblocksy = r300_texture_get_nblocksy(tex, i, &aligned_for_cbzb);
else
- nblocksy = r300_texture_get_nblocksy(desc, i, NULL);
+ nblocksy = r300_texture_get_nblocksy(tex, i, NULL);
layer_size = stride * nblocksy;
@@ -292,75 +257,182 @@ static void r300_setup_miptree(struct r300_screen *screen,
if (base->target == PIPE_TEXTURE_CUBE)
size = layer_size * 6;
else
- size = layer_size * u_minify(desc->depth0, i);
+ size = layer_size * u_minify(tex->tex.depth0, i);
- desc->offset_in_bytes[i] = desc->size_in_bytes;
- desc->size_in_bytes = desc->offset_in_bytes[i] + size;
- desc->layer_size_in_bytes[i] = layer_size;
- desc->stride_in_bytes[i] = stride;
- desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride);
- desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb;
+ tex->tex.offset_in_bytes[i] = tex->tex.size_in_bytes;
+ tex->tex.size_in_bytes = tex->tex.offset_in_bytes[i] + size;
+ tex->tex.layer_size_in_bytes[i] = layer_size;
+ tex->tex.stride_in_bytes[i] = stride;
+ tex->tex.stride_in_pixels[i] = stride_to_width(tex->b.b.b.format, stride);
+ tex->tex.cbzb_allowed[i] = tex->tex.cbzb_allowed[i] && aligned_for_cbzb;
SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d "
"(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n",
- i, u_minify(desc->width0, i), u_minify(desc->height0, i),
- u_minify(desc->depth0, i), stride, desc->size_in_bytes,
- desc->macrotile[i] ? "TRUE" : "FALSE");
+ i, u_minify(tex->tex.width0, i), u_minify(tex->tex.height0, i),
+ u_minify(tex->tex.depth0, i), stride, tex->tex.size_in_bytes,
+ tex->tex.macrotile[i] ? "TRUE" : "FALSE");
}
}
-static void r300_setup_flags(struct r300_texture_desc *desc)
+static void r300_setup_flags(struct r300_resource *tex)
{
- desc->uses_stride_addressing =
- !util_is_power_of_two(desc->b.b.width0) ||
- (desc->stride_in_bytes_override &&
- stride_to_width(desc->b.b.format,
- desc->stride_in_bytes_override) != desc->b.b.width0);
-
- desc->is_npot =
- desc->uses_stride_addressing ||
- !util_is_power_of_two(desc->b.b.height0) ||
- !util_is_power_of_two(desc->b.b.depth0);
+ tex->tex.uses_stride_addressing =
+ !util_is_power_of_two(tex->b.b.b.width0) ||
+ (tex->tex.stride_in_bytes_override &&
+ stride_to_width(tex->b.b.b.format,
+ tex->tex.stride_in_bytes_override) != tex->b.b.b.width0);
+
+ tex->tex.is_npot =
+ tex->tex.uses_stride_addressing ||
+ !util_is_power_of_two(tex->b.b.b.height0) ||
+ !util_is_power_of_two(tex->b.b.b.depth0);
}
static void r300_setup_cbzb_flags(struct r300_screen *rscreen,
- struct r300_texture_desc *desc)
+ struct r300_resource *tex)
{
unsigned i, bpp;
boolean first_level_valid;
- bpp = util_format_get_blocksizebits(desc->b.b.format);
+ bpp = util_format_get_blocksizebits(tex->b.b.b.format);
/* 1) The texture must be point-sampled,
* 2) The depth must be 16 or 32 bits.
* 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage
* with certain texture sizes. Macrotiling ensures the alignment. */
- first_level_valid = desc->b.b.nr_samples <= 1 &&
+ first_level_valid = tex->b.b.b.nr_samples <= 1 &&
(bpp == 16 || bpp == 32) &&
- desc->macrotile[0];
+ tex->tex.macrotile[0];
if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB))
first_level_valid = FALSE;
- for (i = 0; i <= desc->b.b.last_level; i++)
- desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i];
+ for (i = 0; i <= tex->b.b.b.last_level; i++)
+ tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i];
+}
+
+static unsigned r300_pixels_to_dwords(unsigned stride,
+ unsigned height,
+ unsigned xblock, unsigned yblock)
+{
+ return (util_align_npot(stride, xblock) * align(height, yblock)) / (xblock * yblock);
+}
+
+static void r300_setup_hyperz_properties(struct r300_screen *screen,
+ struct r300_resource *tex)
+{
+ /* The tile size of 1 DWORD in ZMASK RAM is:
+ *
+ * GPU Pipes 4x4 mode 8x8 mode
+ * ------------------------------------------
+ * R580 4P/1Z 32x32 64x64
+ * RV570 3P/1Z 48x16 96x32
+ * RV530 1P/2Z 32x16 64x32
+ * 1P/1Z 16x16 32x32
+ */
+ static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8};
+ static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8};
+
+ /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels),
+ * but the blocks have very weird ordering.
+ *
+ * With 2 pipes and an image of size 8xY, where Y >= 1,
+ * clearing 4 dwords clears blocks like this:
+ *
+ * 01012323
+ *
+ * where numbers correspond to dword indices. The blocks are interleaved
+ * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels).
+ *
+ * With 4 pipes and an image of size 8xY, where Y >= 4,
+ * clearing 8 dwords clears blocks like this:
+ * 01012323
+ * 45456767
+ * 01012323
+ * 45456767
+ * where numbers correspond to dword indices. The blocks are interleaved
+ * in both directions, so the alignment must be 4x4 blocks (32x32 pixels)
+ */
+ static unsigned hiz_align_x[4] = {8, 32, 48, 32};
+ static unsigned hiz_align_y[4] = {8, 8, 8, 32};
+
+ if (util_format_is_depth_or_stencil(tex->b.b.b.format) &&
+ util_format_get_blocksizebits(tex->b.b.b.format) == 32 &&
+ tex->tex.microtile) {
+ unsigned i, pipes;
+
+ if (screen->caps.family == CHIP_FAMILY_RV530) {
+ pipes = screen->caps.num_z_pipes;
+ } else {
+ pipes = screen->caps.num_frag_pipes;
+ }
+
+ for (i = 0; i <= tex->b.b.b.last_level; i++) {
+ unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height;
+
+ stride = align(tex->tex.stride_in_pixels[i], 16);
+ height = u_minify(tex->b.b.b.height0, i);
+
+ /* The 8x8 compression mode needs macrotiling. */
+ zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 &&
+ tex->tex.macrotile[i] &&
+ tex->b.b.b.nr_samples <= 1 ? 8 : 4;
+
+ /* Get the ZMASK buffer size in dwords. */
+ zcomp_numdw = r300_pixels_to_dwords(stride, height,
+ zmask_blocks_x_per_dw[pipes-1] * zcompsize,
+ zmask_blocks_y_per_dw[pipes-1] * zcompsize);
+
+ /* Check whether we have enough ZMASK memory. */
+ if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 &&
+ zcomp_numdw <= screen->caps.zmask_ram * pipes) {
+ tex->tex.zmask_dwords[i] = zcomp_numdw;
+ tex->tex.zcomp8x8[i] = zcompsize == 8;
+
+ tex->tex.zmask_stride_in_pixels[i] =
+ util_align_npot(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize);
+ } else {
+ tex->tex.zmask_dwords[i] = 0;
+ tex->tex.zcomp8x8[i] = FALSE;
+ tex->tex.zmask_stride_in_pixels[i] = 0;
+ }
+
+ /* Now setup HIZ. */
+ stride = util_align_npot(stride, hiz_align_x[pipes-1]);
+ height = align(height, hiz_align_y[pipes-1]);
+
+ /* Get the HIZ buffer size in dwords. */
+ hiz_numdw = (stride * height) / (8*8 * pipes);
+
+ /* Check whether we have enough HIZ memory. */
+ if (hiz_numdw <= screen->caps.hiz_ram * pipes) {
+ tex->tex.hiz_dwords[i] = hiz_numdw;
+ tex->tex.hiz_stride_in_pixels[i] = stride;
+ } else {
+ tex->tex.hiz_dwords[i] = 0;
+ tex->tex.hiz_stride_in_pixels[i] = 0;
+ }
+ }
+ }
}
static void r300_setup_tiling(struct r300_screen *screen,
- struct r300_texture_desc *desc)
+ struct r300_resource *tex)
{
- struct r300_winsys_screen *rws = screen->rws;
- enum pipe_format format = desc->b.b.format;
+ enum pipe_format format = tex->b.b.b.format;
boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350;
boolean is_zb = util_format_is_depth_or_stencil(format);
boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING);
+ tex->tex.microtile = R300_BUFFER_LINEAR;
+ tex->tex.macrotile[0] = R300_BUFFER_LINEAR;
+
if (!util_format_is_plain(format)) {
return;
}
/* If height == 1, disable microtiling except for zbuffer. */
- if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) {
+ if (!is_zb && (tex->b.b.b.height0 == 1 || dbg_no_tiling)) {
return;
}
@@ -369,13 +441,11 @@ static void r300_setup_tiling(struct r300_screen *screen,
case 1:
case 4:
case 8:
- desc->microtile = R300_BUFFER_TILED;
+ tex->tex.microtile = R300_BUFFER_TILED;
break;
case 2:
- if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
- desc->microtile = R300_BUFFER_SQUARETILED;
- }
+ tex->tex.microtile = R300_BUFFER_SQUARETILED;
break;
}
@@ -384,104 +454,99 @@ static void r300_setup_tiling(struct r300_screen *screen,
}
/* Set macrotiling. */
- if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) &&
- r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) {
- desc->macrotile[0] = R300_BUFFER_TILED;
+ if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) &&
+ r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) {
+ tex->tex.macrotile[0] = R300_BUFFER_TILED;
}
}
-static void r300_tex_print_info(struct r300_screen *rscreen,
- struct r300_texture_desc *desc,
+static void r300_tex_print_info(struct r300_resource *tex,
const char *func)
{
fprintf(stderr,
"r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, "
"LastLevel: %i, Size: %i, Format: %s\n",
func,
- desc->macrotile[0] ? "YES" : " NO",
- desc->microtile ? "YES" : " NO",
- desc->stride_in_pixels[0],
- desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0,
- desc->b.b.last_level, desc->size_in_bytes,
- util_format_short_name(desc->b.b.format));
+ tex->tex.macrotile[0] ? "YES" : " NO",
+ tex->tex.microtile ? "YES" : " NO",
+ tex->tex.stride_in_pixels[0],
+ tex->b.b.b.width0, tex->b.b.b.height0, tex->b.b.b.depth0,
+ tex->b.b.b.last_level, tex->tex.size_in_bytes,
+ util_format_short_name(tex->b.b.b.format));
}
boolean r300_texture_desc_init(struct r300_screen *rscreen,
- struct r300_texture_desc *desc,
- const struct pipe_resource *base,
- enum r300_buffer_tiling microtile,
- enum r300_buffer_tiling macrotile,
- unsigned stride_in_bytes_override,
- unsigned max_buffer_size)
+ struct r300_resource *tex,
+ const struct pipe_resource *base)
{
- desc->b.b = *base;
- desc->b.b.screen = &rscreen->screen;
- desc->stride_in_bytes_override = stride_in_bytes_override;
- desc->width0 = base->width0;
- desc->height0 = base->height0;
- desc->depth0 = base->depth0;
-
- r300_setup_flags(desc);
+ tex->b.b.b.target = base->target;
+ tex->b.b.b.format = base->format;
+ tex->b.b.b.width0 = base->width0;
+ tex->b.b.b.height0 = base->height0;
+ tex->b.b.b.depth0 = base->depth0;
+ tex->b.b.b.array_size = base->array_size;
+ tex->b.b.b.last_level = base->last_level;
+ tex->b.b.b.nr_samples = base->nr_samples;
+ tex->tex.width0 = base->width0;
+ tex->tex.height0 = base->height0;
+ tex->tex.depth0 = base->depth0;
+
+ r300_setup_flags(tex);
/* Align a 3D NPOT texture to POT. */
- if (base->target == PIPE_TEXTURE_3D && desc->is_npot) {
- desc->width0 = util_next_power_of_two(desc->width0);
- desc->height0 = util_next_power_of_two(desc->height0);
- desc->depth0 = util_next_power_of_two(desc->depth0);
+ if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) {
+ tex->tex.width0 = util_next_power_of_two(tex->tex.width0);
+ tex->tex.height0 = util_next_power_of_two(tex->tex.height0);
+ tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0);
}
/* Setup tiling. */
- if (microtile == R300_BUFFER_SELECT_LAYOUT ||
- macrotile == R300_BUFFER_SELECT_LAYOUT) {
- r300_setup_tiling(rscreen, desc);
- } else {
- desc->microtile = microtile;
- desc->macrotile[0] = macrotile;
- assert(desc->b.b.last_level == 0);
+ if (tex->tex.microtile == R300_BUFFER_SELECT_LAYOUT) {
+ r300_setup_tiling(rscreen, tex);
}
- r300_setup_cbzb_flags(rscreen, desc);
+ r300_setup_cbzb_flags(rscreen, tex);
/* Setup the miptree description. */
- r300_setup_miptree(rscreen, desc, TRUE);
+ r300_setup_miptree(rscreen, tex, TRUE);
/* If the required buffer size is larger the given max size,
* try again without the alignment for the CBZB clear. */
- if (max_buffer_size && desc->size_in_bytes > max_buffer_size) {
- r300_setup_miptree(rscreen, desc, FALSE);
+ if (tex->buf_size && tex->tex.size_in_bytes > tex->buf_size) {
+ r300_setup_miptree(rscreen, tex, FALSE);
}
- r300_texture_3d_fix_mipmapping(rscreen, desc);
+ r300_setup_hyperz_properties(rscreen, tex);
- if (max_buffer_size) {
+ if (tex->buf_size) {
/* Make sure the buffer we got is large enough. */
- if (desc->size_in_bytes > max_buffer_size) {
+ if (tex->tex.size_in_bytes > tex->buf_size) {
fprintf(stderr, "r300: texture_desc_init: The buffer is not "
"large enough. Got: %i, Need: %i, Info:\n",
- max_buffer_size, desc->size_in_bytes);
- r300_tex_print_info(rscreen, desc, "texture_desc_init");
+ tex->buf_size, tex->tex.size_in_bytes);
+ r300_tex_print_info(tex, "texture_desc_init");
return FALSE;
}
- desc->buffer_size_in_bytes = max_buffer_size;
+ tex->tex.buffer_size_in_bytes = tex->buf_size;
} else {
- desc->buffer_size_in_bytes = desc->size_in_bytes;
+ tex->tex.buffer_size_in_bytes = tex->tex.size_in_bytes;
}
if (SCREEN_DBG_ON(rscreen, DBG_TEX))
- r300_tex_print_info(rscreen, desc, "texture_desc_init");
+ r300_tex_print_info(tex, "texture_desc_init");
return TRUE;
}
-unsigned r300_texture_get_offset(struct r300_texture_desc *desc,
+unsigned r300_texture_get_offset(struct r300_resource *tex,
unsigned level, unsigned layer)
{
- unsigned offset = desc->offset_in_bytes[level];
+ unsigned offset = tex->tex.offset_in_bytes[level];
- switch (desc->b.b.target) {
+ switch (tex->b.b.b.target) {
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_CUBE:
- return offset + layer * desc->layer_size_in_bytes[level];
+ return offset + layer * tex->tex.layer_size_in_bytes[level];
default:
assert(layer == 0);
diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h
index 44d88794a1..ce6e9643ec 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.h
+++ b/src/gallium/drivers/r300/r300_texture_desc.h
@@ -30,7 +30,7 @@
struct pipe_resource;
struct r300_screen;
struct r300_texture_desc;
-struct r300_texture;
+struct r300_resource;
enum r300_dim {
DIM_WIDTH = 0,
@@ -41,17 +41,13 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
unsigned num_samples,
enum r300_buffer_tiling microtile,
enum r300_buffer_tiling macrotile,
- enum r300_dim dim);
+ enum r300_dim dim, boolean is_rs690);
boolean r300_texture_desc_init(struct r300_screen *rscreen,
- struct r300_texture_desc *desc,
- const struct pipe_resource *base,
- enum r300_buffer_tiling microtile,
- enum r300_buffer_tiling macrotile,
- unsigned stride_in_bytes_override,
- unsigned max_buffer_size);
-
-unsigned r300_texture_get_offset(struct r300_texture_desc *desc,
+ struct r300_resource *tex,
+ const struct pipe_resource *base);
+
+unsigned r300_texture_get_offset(struct r300_resource *tex,
unsigned level, unsigned layer);
#endif
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 15a323989b..97ec0a1a1f 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -191,7 +191,12 @@ static void transform_dstreg(
dst->File = translate_register_file(src->Register.File);
dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
dst->WriteMask = src->Register.WriteMask;
- dst->RelAddr = src->Register.Indirect;
+
+ if (src->Register.Indirect) {
+ ttr->error = TRUE;
+ fprintf(stderr, "r300: Relative addressing of destination operands "
+ "is unsupported.\n");
+ }
}
static void transform_srcreg(
@@ -332,6 +337,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
unsigned imm_index = 0;
int i;
+ ttr->error = FALSE;
+
/* Allocate constants placeholders.
*
* Note: What if declared constants are not contiguous? */
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
index 97641a954b..adb044cfe5 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
@@ -47,6 +47,9 @@ struct tgsi_to_rc {
/* Vertex shaders have no half swizzles, and no way to handle them, so
* until rc grows proper support, indicate if they're safe to use. */
boolean use_half_swizzles;
+
+ /* If an error occured. */
+ boolean error;
};
void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens);
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
index 3b95af79bc..65c5095be6 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -37,7 +37,7 @@ struct r300_transfer {
unsigned offset;
/* Linear texture. */
- struct r300_texture *linear_texture;
+ struct r300_resource *linear_texture;
};
/* Convenience cast wrapper. */
@@ -54,7 +54,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx,
struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
struct pipe_resource *tex = transfer->resource;
- ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, 0,
+ ctx->resource_copy_region(ctx, &r300transfer->linear_texture->b.b.b, 0,
0, 0, 0,
tex, transfer->level, &transfer->box);
}
@@ -70,9 +70,10 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx,
ctx->resource_copy_region(ctx, tex, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
- &r300transfer->linear_texture->desc.b.b, 0, &src_box);
+ &r300transfer->linear_texture->b.b.b, 0, &src_box);
- ctx->flush(ctx, 0, NULL);
+ /* XXX remove this. */
+ r300_flush(ctx, 0, NULL);
}
struct pipe_transfer*
@@ -83,25 +84,25 @@ r300_texture_get_transfer(struct pipe_context *ctx,
const struct pipe_box *box)
{
struct r300_context *r300 = r300_context(ctx);
- struct r300_texture *tex = r300_texture(texture);
+ struct r300_resource *tex = r300_resource(texture);
struct r300_transfer *trans;
struct pipe_resource base;
boolean referenced_cs, referenced_hw, blittable;
+ const struct util_format_description *desc =
+ util_format_description(texture->format);
referenced_cs =
- r300->rws->cs_is_buffer_referenced(r300->cs,
- tex->cs_buffer, R300_REF_CS);
+ r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf);
if (referenced_cs) {
referenced_hw = TRUE;
} else {
referenced_hw =
- r300->rws->cs_is_buffer_referenced(r300->cs,
- tex->cs_buffer, R300_REF_HW);
+ r300->rws->buffer_is_busy(tex->buf);
}
- blittable = ctx->screen->is_format_supported(
- ctx->screen, texture->format, texture->target, 0,
- PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET, 0);
+ blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
+ desc->layout == UTIL_FORMAT_LAYOUT_S3TC ||
+ desc->layout == UTIL_FORMAT_LAYOUT_RGTC;
trans = CALLOC_STRUCT(r300_transfer);
if (trans) {
@@ -114,13 +115,17 @@ r300_texture_get_transfer(struct pipe_context *ctx,
/* If the texture is tiled, we must create a temporary detiled texture
* for this transfer.
* Also make write transfers pipelined. */
- if (tex->desc.microtile || tex->desc.macrotile[level] ||
- ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) {
+ if (tex->tex.microtile || tex->tex.macrotile[level] ||
+ (referenced_hw && blittable && !(usage & PIPE_TRANSFER_READ))) {
+ if (r300->blitter->running) {
+ fprintf(stderr, "r300: ERROR: Blitter recursion in texture_get_transfer.\n");
+ os_break();
+ }
+
base.target = PIPE_TEXTURE_2D;
base.format = texture->format;
base.width0 = box->width;
base.height0 = box->height;
- /* XXX: was depth0 = 0 */
base.depth0 = 1;
base.array_size = 1;
base.last_level = 0;
@@ -141,23 +146,23 @@ r300_texture_get_transfer(struct pipe_context *ctx,
}
/* Create the temporary texture. */
- trans->linear_texture = r300_texture(
+ trans->linear_texture = r300_resource(
ctx->screen->resource_create(ctx->screen,
&base));
if (!trans->linear_texture) {
/* Oh crap, the thing can't create the texture.
* Let's flush and try again. */
- ctx->flush(ctx, 0, NULL);
+ r300_flush(ctx, 0, NULL);
- trans->linear_texture = r300_texture(
+ trans->linear_texture = r300_resource(
ctx->screen->resource_create(ctx->screen,
&base));
if (!trans->linear_texture) {
/* For linear textures, it's safe to fallback to
* an unpipelined transfer. */
- if (!tex->desc.microtile && !tex->desc.macrotile[level]) {
+ if (!tex->tex.microtile && !tex->tex.macrotile[level]) {
goto unpipelined;
}
@@ -169,18 +174,12 @@ r300_texture_get_transfer(struct pipe_context *ctx,
}
}
- assert(!trans->linear_texture->desc.microtile &&
- !trans->linear_texture->desc.macrotile[0]);
+ assert(!trans->linear_texture->tex.microtile &&
+ !trans->linear_texture->tex.macrotile[0]);
- /* Set the stride.
- *
- * Even though we are using an internal texture for this,
- * the transfer level, box and usage parameters still reflect
- * the arguments received to get_transfer. We just do the
- * right thing internally.
- */
+ /* Set the stride. */
trans->transfer.stride =
- trans->linear_texture->desc.stride_in_bytes[0];
+ trans->linear_texture->tex.stride_in_bytes[0];
if (usage & PIPE_TRANSFER_READ) {
/* We cannot map a tiled texture directly because the data is
@@ -188,18 +187,19 @@ r300_texture_get_transfer(struct pipe_context *ctx,
r300_copy_from_tiled_texture(ctx, trans);
/* Always referenced in the blit. */
- ctx->flush(ctx, 0, NULL);
+ r300_flush(ctx, 0, NULL);
}
return &trans->transfer;
}
unpipelined:
/* Unpipelined transfer. */
- trans->transfer.stride = tex->desc.stride_in_bytes[level];
- trans->offset = r300_texture_get_offset(&tex->desc, level, box->z);
+ trans->transfer.stride = tex->tex.stride_in_bytes[level];
+ trans->offset = r300_texture_get_offset(tex, level, box->z);
- if (referenced_cs)
- ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ if (referenced_cs &&
+ !(usage & PIPE_TRANSFER_UNSYNCHRONIZED))
+ r300_flush(ctx, 0, NULL);
return &trans->transfer;
}
return NULL;
@@ -228,20 +228,19 @@ void* r300_texture_transfer_map(struct pipe_context *ctx,
struct r300_context *r300 = r300_context(ctx);
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys;
struct r300_transfer *r300transfer = r300_transfer(transfer);
- struct r300_texture *tex = r300_texture(transfer->resource);
+ struct r300_resource *tex = r300_resource(transfer->resource);
char *map;
- enum pipe_format format = tex->desc.b.b.format;
+ enum pipe_format format = tex->b.b.b.format;
if (r300transfer->linear_texture) {
/* The detiled texture is of the same size as the region being mapped
* (no offset needed). */
- return rws->buffer_map(rws,
- r300transfer->linear_texture->buffer,
+ return rws->buffer_map(r300transfer->linear_texture->buf,
r300->cs,
transfer->usage);
} else {
/* Tiling is disabled. */
- map = rws->buffer_map(rws, tex->buffer, r300->cs,
+ map = rws->buffer_map(tex->buf, r300->cs,
transfer->usage);
if (!map) {
@@ -259,11 +258,11 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx,
{
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys;
struct r300_transfer *r300transfer = r300_transfer(transfer);
- struct r300_texture *tex = r300_texture(transfer->resource);
+ struct r300_resource *tex = r300_resource(transfer->resource);
if (r300transfer->linear_texture) {
- rws->buffer_unmap(rws, r300transfer->linear_texture->buffer);
+ rws->buffer_unmap(r300transfer->linear_texture->buf);
} else {
- rws->buffer_unmap(rws, tex->buffer);
+ rws->buffer_unmap(tex->buf);
}
}
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 78021e2c5d..b319890157 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -226,6 +226,13 @@ void r300_translate_vertex_shader(struct r300_context *r300,
r300_tgsi_to_rc(&ttr, vs->state.tokens);
+ if (ttr.error) {
+ fprintf(stderr, "r300 VP: Cannot translate a shader. "
+ "Using a dummy shader instead.\n");
+ r300_dummy_vertex_shader(r300, vs);
+ return;
+ }
+
if (compiler.Base.Program.Constants.Count > 200) {
compiler.Base.remove_unused_constants = TRUE;
}
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index 0dd330d101..3a6798a542 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -28,37 +28,44 @@
* Any winsys hosting this pipe needs to implement r300_winsys_screen and then
* call r300_screen_create to start things. */
+#include "r300_defines.h"
+
+#include "pipebuffer/pb_bufmgr.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
-#include "r300_defines.h"
-
#define R300_MAX_CMDBUF_DWORDS (16 * 1024)
+#define R300_FLUSH_ASYNC (1 << 0)
struct winsys_handle;
struct r300_winsys_screen;
-struct r300_winsys_buffer; /* for map/unmap etc. */
-struct r300_winsys_cs_buffer; /* for write_reloc etc. */
+#define r300_winsys_bo pb_buffer
+#define r300_winsys_bo_reference(pdst, src) pb_reference(pdst, src)
+
+struct r300_winsys_cs_handle; /* for write_reloc etc. */
struct r300_winsys_cs {
- unsigned cdw; /* Number of used dwords. */
- uint32_t *buf; /* The command buffer. */
+ unsigned cdw; /* Number of used dwords. */
+ uint32_t *buf; /* The command buffer. */
};
enum r300_value_id {
R300_VID_PCI_ID,
R300_VID_GB_PIPES,
R300_VID_Z_PIPES,
- R300_VID_SQUARE_TILING_SUPPORT,
- R300_VID_DRM_2_3_0,
- R300_VID_DRM_2_6_0,
- R300_CAN_HYPERZ,
-};
-
-enum r300_reference_domain { /* bitfield */
- R300_REF_CS = 1,
- R300_REF_HW = 2
+ R300_VID_GART_SIZE,
+ R300_VID_VRAM_SIZE,
+ R300_VID_DRM_MAJOR,
+ R300_VID_DRM_MINOR,
+ R300_VID_DRM_PATCHLEVEL,
+
+ /* These should probably go away: */
+ R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */
+ R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask, R16F/RG16F */
+
+ R300_CAN_HYPERZ, /* ZMask + HiZ */
+ R300_CAN_AACOMPRESS, /* CMask */
};
struct r300_winsys_screen {
@@ -97,91 +104,78 @@ struct r300_winsys_screen {
* \param domain A bitmask of the R300_DOMAIN_* flags.
* \return The created buffer object.
*/
- struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws,
+ struct r300_winsys_bo *(*buffer_create)(struct r300_winsys_screen *ws,
unsigned size,
unsigned alignment,
unsigned bind,
unsigned usage,
enum r300_buffer_domain domain);
- struct r300_winsys_cs_buffer *(*buffer_get_cs_handle)(
- struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf);
-
- /**
- * Reference a buffer object (assign with reference counting).
- *
- * \param ws The winsys this function is called from.
- * \param pdst A destination pointer to set the source buffer to.
- * \param src A source buffer object.
- */
- void (*buffer_reference)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer **pdst,
- struct r300_winsys_buffer *src);
+ struct r300_winsys_cs_handle *(*buffer_get_cs_handle)(
+ struct r300_winsys_bo *buf);
/**
* Map the entire data store of a buffer object into the client's address
* space.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to map.
* \param cs A command stream to flush if the buffer is referenced by it.
* \param usage A bitmask of the PIPE_TRANSFER_* flags.
* \return The pointer at the beginning of the buffer.
*/
- void *(*buffer_map)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
+ void *(*buffer_map)(struct r300_winsys_bo *buf,
struct r300_winsys_cs *cs,
enum pipe_transfer_usage usage);
/**
* Unmap a buffer object from the client's address space.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to unmap.
*/
- void (*buffer_unmap)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf);
+ void (*buffer_unmap)(struct r300_winsys_bo *buf);
+
+ /**
+ * Return TRUE if a buffer object is being used by the GPU.
+ *
+ * \param buf A winsys buffer object.
+ */
+ boolean (*buffer_is_busy)(struct r300_winsys_bo *buf);
/**
* Wait for a buffer object until it is not used by a GPU. This is
* equivalent to a fence placed after the last command using the buffer,
* and synchronizing to the fence.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to wait for.
*/
- void (*buffer_wait)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf);
+ void (*buffer_wait)(struct r300_winsys_bo *buf);
/**
* Return tiling flags describing a memory layout of a buffer object.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to get the flags from.
* \param macrotile A pointer to the return value of the microtile flag.
* \param microtile A pointer to the return value of the macrotile flag.
*
* \note microtile and macrotile are not bitmasks!
*/
- void (*buffer_get_tiling)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
+ void (*buffer_get_tiling)(struct r300_winsys_bo *buf,
enum r300_buffer_tiling *microtile,
enum r300_buffer_tiling *macrotile);
/**
* Set tiling flags describing a memory layout of a buffer object.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to set the flags for.
+ * \param cs A command stream to flush if the buffer is referenced by it.
* \param macrotile A macrotile flag.
* \param microtile A microtile flag.
* \param stride A stride of the buffer in bytes, for texturing.
*
* \note microtile and macrotile are not bitmasks!
*/
- void (*buffer_set_tiling)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
+ void (*buffer_set_tiling)(struct r300_winsys_bo *buf,
+ struct r300_winsys_cs *cs,
enum r300_buffer_tiling microtile,
enum r300_buffer_tiling macrotile,
unsigned stride);
@@ -196,7 +190,7 @@ struct r300_winsys_screen {
* \param stride The returned buffer stride in bytes.
* \param size The returned buffer size.
*/
- struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws,
+ struct r300_winsys_bo *(*buffer_from_handle)(struct r300_winsys_screen *ws,
struct winsys_handle *whandle,
unsigned *stride,
unsigned *size);
@@ -205,14 +199,12 @@ struct r300_winsys_screen {
* Get a winsys handle from a winsys buffer. The internal structure
* of the handle is platform-specific and only a winsys should access it.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to get the handle from.
* \param whandle A winsys handle pointer.
* \param stride A stride of the buffer in bytes, for texturing.
* \return TRUE on success.
*/
- boolean (*buffer_get_handle)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
+ boolean (*buffer_get_handle)(struct r300_winsys_bo *buf,
unsigned stride,
struct winsys_handle *whandle);
@@ -238,23 +230,22 @@ struct r300_winsys_screen {
void (*cs_destroy)(struct r300_winsys_cs *cs);
/**
- * Add a buffer object to the list of buffers to validate.
+ * Add a new buffer relocation. Every relocation must first be added
+ * before it can be written.
*
- * \param cs A command stream to add buffer for validation against.
- * \param buf A winsys buffer to validate.
- * \param rd A read domain containing a bitmask
- * of the R300_DOMAIN_* flags.
- * \param wd A write domain containing a bitmask
- * of the R300_DOMAIN_* flags.
+ * \param cs A command stream to add buffer for validation against.
+ * \param buf A winsys buffer to validate.
+ * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags.
+ * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags.
*/
- void (*cs_add_buffer)(struct r300_winsys_cs *cs,
- struct r300_winsys_cs_buffer *buf,
- enum r300_buffer_domain rd,
- enum r300_buffer_domain wd);
+ void (*cs_add_reloc)(struct r300_winsys_cs *cs,
+ struct r300_winsys_cs_handle *buf,
+ enum r300_buffer_domain rd,
+ enum r300_buffer_domain wd);
/**
- * Revalidate all currently set up winsys buffers.
- * Returns TRUE if a flush is required.
+ * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+ * added so far.
*
* \param cs A command stream to validate.
*/
@@ -269,16 +260,15 @@ struct r300_winsys_screen {
* \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags.
*/
void (*cs_write_reloc)(struct r300_winsys_cs *cs,
- struct r300_winsys_cs_buffer *buf,
- enum r300_buffer_domain rd,
- enum r300_buffer_domain wd);
+ struct r300_winsys_cs_handle *buf);
/**
* Flush a command stream.
*
* \param cs A command stream to flush.
+ * \param flags, R300_FLUSH_ASYNC or 0.
*/
- void (*cs_flush)(struct r300_winsys_cs *cs);
+ void (*cs_flush)(struct r300_winsys_cs *cs, unsigned flags);
/**
* Set a flush callback which is called from winsys when flush is
@@ -289,28 +279,17 @@ struct r300_winsys_screen {
* \param user A user pointer that will be passed to the flush callback.
*/
void (*cs_set_flush)(struct r300_winsys_cs *cs,
- void (*flush)(void *),
+ void (*flush)(void *ctx, unsigned flags),
void *user);
/**
- * Reset the list of buffer objects to validate, usually called
- * prior to adding buffer objects for validation.
- *
- * \param cs A command stream to reset buffers for.
- */
- void (*cs_reset_buffers)(struct r300_winsys_cs *cs);
-
- /**
- * Return TRUE if a buffer is referenced by a command stream or by hardware
- * (i.e. is busy), based on the domain parameter.
+ * Return TRUE if a buffer is referenced by a command stream.
*
* \param cs A command stream.
* \param buf A winsys buffer.
- * \param domain A bitmask of the R300_REF_* enums.
*/
boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs,
- struct r300_winsys_cs_buffer *buf,
- enum r300_reference_domain domain);
+ struct r300_winsys_cs_handle *buf);
};
#endif /* R300_WINSYS_H */
diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk
new file mode 100644
index 0000000000..b76a78810f
--- /dev/null
+++ b/src/gallium/drivers/r600/Android.mk
@@ -0,0 +1,43 @@
+ifeq ($(strip $(MESA_BUILD_R600G)),true)
+
+LOCAL_PATH := $(call my-dir)
+
+# from Makefile
+C_SOURCES = \
+ r600_asm.c \
+ r600_blit.c \
+ r600_buffer.c \
+ r600_helper.c \
+ r600_pipe.c \
+ r600_query.c \
+ r600_resource.c \
+ r600_shader.c \
+ r600_state.c \
+ r600_texture.c \
+ r700_asm.c \
+ evergreen_state.c \
+ eg_asm.c \
+ r600_translate.c \
+ r600_state_common.c
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ $(C_SOURCES)
+
+LOCAL_CFLAGS := \
+ -std=c99 \
+ -fvisibility=hidden \
+ -Wno-sign-compare
+
+LOCAL_C_INCLUDES := \
+ external/mesa/src/gallium/include \
+ external/mesa/src/gallium/auxiliary \
+ external/drm \
+ external/drm/include/drm
+
+LOCAL_MODULE := libmesa_pipe_r600
+
+include $(BUILD_STATIC_LIBRARY)
+
+endif # MESA_BUILD_R600G
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index 3fc1fa94c2..5a5fa6d65f 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -9,7 +9,7 @@ except OSError:
Return()
env.Append(CPPPATH = [
- '#/include',
+ '#/include',
'#/src/mesa',
])
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index b79875c7c7..3793b919dd 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -35,15 +35,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
switch (cf->inst) {
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
- S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
+ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
- S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
break;
@@ -60,7 +62,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
- bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
@@ -90,37 +93,3 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
}
return 0;
}
-
-void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
-{
- struct r600_pipe_state *rstate;
- unsigned i = 0;
-
- if (count > 8) {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(8 - 1);
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 8 - 1);
- } else {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 1);
- }
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
- S_SQ_CF_WORD1_BARRIER(1);
-
- rstate = &ve->rstate;
- rstate->id = R600_PIPE_STATE_FETCH_SHADER;
- rstate->nregs = 0;
- r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
- (r600_bo_offset(ve->fetch_shader)) >> 8,
- 0xFFFFFFFF, ve->fetch_shader);
-}
diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index ecea1db4f1..cae3888051 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim)
default:
case PIPE_TEXTURE_1D:
return V_030000_SQ_TEX_DIM_1D;
+ case PIPE_TEXTURE_1D_ARRAY:
+ return V_030000_SQ_TEX_DIM_1D_ARRAY;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
return V_030000_SQ_TEX_DIM_2D;
+ case PIPE_TEXTURE_2D_ARRAY:
+ return V_030000_SQ_TEX_DIM_2D_ARRAY;
case PIPE_TEXTURE_3D:
return V_030000_SQ_TEX_DIM_3D;
case PIPE_TEXTURE_CUBE:
@@ -289,10 +293,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
{
switch (format) {
/* 8-bit buffers. */
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_028C70_SWAP_ALT;
+
case PIPE_FORMAT_A8_UNORM:
return V_028C70_SWAP_ALT_REV;
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_028C70_SWAP_STD;
@@ -313,6 +321,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_028C70_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
return V_028C70_SWAP_ALT;
case PIPE_FORMAT_R8G8_UNORM:
return V_028C70_SWAP_STD;
@@ -352,9 +361,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10X2_SNORM:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_028C70_SWAP_STD_REV;
+ return V_028C70_SWAP_STD;
+
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return V_028C70_SWAP_ALT;
case PIPE_FORMAT_R16G16_UNORM:
return V_028C70_SWAP_STD;
@@ -362,14 +373,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
- // return V_028C70_COLOR_16_16_16_16;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
- // return V_028C70_COLOR_16_16_16_16_FLOAT;
/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
- // return V_028C70_COLOR_32_32_32_32_FLOAT;
- return 0;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_028C70_SWAP_STD;
default:
R600_ERR("unsupported colorswap format %d\n", format);
return ~0;
@@ -381,9 +391,13 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
{
switch (format) {
/* 8-bit buffers. */
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_028C70_COLOR_4_4;
+
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_028C70_COLOR_8;
@@ -404,6 +418,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_028C70_COLOR_16;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
case PIPE_FORMAT_R8G8_UNORM:
return V_028C70_COLOR_8_8;
@@ -430,7 +445,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R10G10B10X2_SNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_028C70_COLOR_10_10_10_2;
+ return V_028C70_COLOR_2_10_10_10;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
@@ -471,6 +486,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_028C70_COLOR_32_32;
/* 128-bit buffers. */
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_028C70_COLOR_32_32_32_32;
case PIPE_FORMAT_R32G32B32_FLOAT:
return V_028C70_COLOR_32_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
@@ -485,9 +503,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
}
}
-static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format)
+static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
{
- return r600_translate_texformat(format, NULL, NULL, NULL) != ~0;
+ return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0;
}
static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format)
@@ -501,144 +519,4 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format)
return r600_translate_dbformat(format) != ~0;
}
-static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format)
-{
- return r600_translate_colorformat(format) != ~0;
-}
-
-static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
-{
- uint32_t result = 0;
- const struct util_format_description *desc;
- unsigned i;
-
- desc = util_format_description(format);
- if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
- goto out_unknown;
- }
-
- /* Find the first non-VOID channel. */
- for (i = 0; i < 4; i++) {
- if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
- break;
- }
- }
-
- switch (desc->channel[i].type) {
- /* Half-floats, floats, doubles */
- case UTIL_FORMAT_TYPE_FLOAT:
- switch (desc->channel[i].size) {
- case 16:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_16_FLOAT;
- break;
- case 2:
- result = FMT_16_16_FLOAT;
- break;
- case 3:
- result = FMT_16_16_16_FLOAT;
- break;
- case 4:
- result = FMT_16_16_16_16_FLOAT;
- break;
- }
- break;
- case 32:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_32_FLOAT;
- break;
- case 2:
- result = FMT_32_32_FLOAT;
- break;
- case 3:
- result = FMT_32_32_32_FLOAT;
- break;
- case 4:
- result = FMT_32_32_32_32_FLOAT;
- break;
- }
- break;
- default:
- goto out_unknown;
- }
- break;
- /* Unsigned ints */
- case UTIL_FORMAT_TYPE_UNSIGNED:
- /* Signed ints */
- case UTIL_FORMAT_TYPE_SIGNED:
- switch (desc->channel[i].size) {
- case 8:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_8;
- break;
- case 2:
- result = FMT_8_8;
- break;
- case 3:
-// result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */
-// break;
- case 4:
- result = FMT_8_8_8_8;
- break;
- }
- break;
- case 16:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_16;
- break;
- case 2:
- result = FMT_16_16;
- break;
- case 3:
-// result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */
-// break;
- case 4:
- result = FMT_16_16_16_16;
- break;
- }
- break;
- case 32:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_32;
- break;
- case 2:
- result = FMT_32_32;
- break;
- case 3:
- result = FMT_32_32_32;
- break;
- case 4:
- result = FMT_32_32_32_32;
- break;
- }
- break;
- default:
- goto out_unknown;
- }
- break;
- default:
- goto out_unknown;
- }
-
- result = S_030008_DATA_FORMAT(result);
-
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
- result |= S_030008_FORMAT_COMP_ALL(1);
- }
- if (desc->channel[i].normalized) {
- result |= S_030008_NUM_FORMAT_ALL(0);
- } else {
- result |= S_030008_NUM_FORMAT_ALL(2);
- }
- return result;
-out_unknown:
- R600_ERR("unsupported vertex format %s\n", util_format_name(format));
- return ~0;
-}
-
#endif
diff --git a/src/gallium/drivers/r600/eg_states_inc.h b/src/gallium/drivers/r600/eg_states_inc.h
deleted file mode 100644
index 1379c11291..0000000000
--- a/src/gallium/drivers/r600/eg_states_inc.h
+++ /dev/null
@@ -1,458 +0,0 @@
-/* This file is autogenerated from eg_states.h - do not edit directly */
-/* autogenerating script is gen_eg_states.py */
-
-/* EG_CONFIG */
-#define EG_CONFIG__SQ_CONFIG 0
-#define EG_CONFIG__SPI_CONFIG_CNTL 1
-#define EG_CONFIG__SPI_CONFIG_CNTL_1 2
-#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1 3
-#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2 4
-#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3 5
-#define EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1 6
-#define EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2 7
-#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1 8
-#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2 9
-#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3 10
-#define EG_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 11
-#define EG_CONFIG__PA_CL_ENHANCE 12
-#define EG_CONFIG__SQ_DYN_GPR_RESOURCE_LIMIT_1 13
-#define EG_CONFIG__SQ_LDS_ALLOC_PS 14
-#define EG_CONFIG__SX_MISC 15
-#define EG_CONFIG__SQ_ESGS_RING_ITEMSIZE 16
-#define EG_CONFIG__SQ_GSVS_RING_ITEMSIZE 17
-#define EG_CONFIG__SQ_ESTMP_RING_ITEMSIZE 18
-#define EG_CONFIG__SQ_GSTMP_RING_ITEMSIZE 19
-#define EG_CONFIG__SQ_VSTMP_RING_ITEMSIZE 20
-#define EG_CONFIG__SQ_PSTMP_RING_ITEMSIZE 21
-#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE 22
-#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_1 23
-#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_2 24
-#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_3 25
-#define EG_CONFIG__VGT_OUTPUT_PATH_CNTL 26
-#define EG_CONFIG__VGT_HOS_CNTL 27
-#define EG_CONFIG__VGT_HOS_MAX_TESS_LEVEL 28
-#define EG_CONFIG__VGT_HOS_MIN_TESS_LEVEL 29
-#define EG_CONFIG__VGT_HOS_REUSE_DEPTH 30
-#define EG_CONFIG__VGT_GROUP_PRIM_TYPE 31
-#define EG_CONFIG__VGT_GROUP_FIRST_DECR 32
-#define EG_CONFIG__VGT_GROUP_DECR 33
-#define EG_CONFIG__VGT_GROUP_VECT_0_CNTL 34
-#define EG_CONFIG__VGT_GROUP_VECT_1_CNTL 35
-#define EG_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 36
-#define EG_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 37
-#define EG_CONFIG__VGT_GS_MODE 38
-#define EG_CONFIG__PA_SC_MODE_CNTL_0 39
-#define EG_CONFIG__PA_SC_MODE_CNTL_1 40
-#define EG_CONFIG__VGT_REUSE_OFF 41
-#define EG_CONFIG__VGT_VTX_CNT_EN 42
-#define EG_CONFIG__VGT_SHADER_STAGES_EN 43
-#define EG_CONFIG__VGT_STRMOUT_CONFIG 44
-#define EG_CONFIG__VGT_STRMOUT_BUFFER_CONFIG 45
-#define EG_CONFIG_SIZE 46
-#define EG_CONFIG_PM4 128
-
-/* EG_CB_CNTL */
-#define EG_CB_CNTL__CB_TARGET_MASK 0
-#define EG_CB_CNTL__CB_SHADER_MASK 1
-#define EG_CB_CNTL__CB_COLOR_CONTROL 2
-#define EG_CB_CNTL__PA_SC_AA_CONFIG 3
-#define EG_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX 4
-#define EG_CB_CNTL__PA_SC_AA_MASK 5
-#define EG_CB_CNTL_SIZE 6
-#define EG_CB_CNTL_PM4 128
-
-/* EG_RASTERIZER */
-#define EG_RASTERIZER__SPI_INTERP_CONTROL_0 0
-#define EG_RASTERIZER__PA_CL_CLIP_CNTL 1
-#define EG_RASTERIZER__PA_SU_SC_MODE_CNTL 2
-#define EG_RASTERIZER__PA_CL_VS_OUT_CNTL 3
-#define EG_RASTERIZER__PA_CL_NANINF_CNTL 4
-#define EG_RASTERIZER__PA_SU_POINT_SIZE 5
-#define EG_RASTERIZER__PA_SU_POINT_MINMAX 6
-#define EG_RASTERIZER__PA_SU_LINE_CNTL 7
-#define EG_RASTERIZER__PA_SC_MPASS_PS_CNTL 8
-#define EG_RASTERIZER__PA_SC_LINE_CNTL 9
-#define EG_RASTERIZER__PA_SU_VTX_CNTL 10
-#define EG_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ 11
-#define EG_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ 12
-#define EG_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ 13
-#define EG_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ 14
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL 15
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP 16
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE 17
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET 18
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE 19
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET 20
-#define EG_RASTERIZER_SIZE 21
-#define EG_RASTERIZER_PM4 128
-
-/* EG_VIEWPORT */
-#define EG_VIEWPORT__PA_SC_VPORT_ZMIN_0 0
-#define EG_VIEWPORT__PA_SC_VPORT_ZMAX_0 1
-#define EG_VIEWPORT__PA_CL_VPORT_XSCALE_0 2
-#define EG_VIEWPORT__PA_CL_VPORT_YSCALE_0 3
-#define EG_VIEWPORT__PA_CL_VPORT_ZSCALE_0 4
-#define EG_VIEWPORT__PA_CL_VPORT_XOFFSET_0 5
-#define EG_VIEWPORT__PA_CL_VPORT_YOFFSET_0 6
-#define EG_VIEWPORT__PA_CL_VPORT_ZOFFSET_0 7
-#define EG_VIEWPORT__PA_CL_VTE_CNTL 8
-#define EG_VIEWPORT_SIZE 9
-#define EG_VIEWPORT_PM4 128
-
-/* EG_SCISSOR */
-#define EG_SCISSOR__PA_SC_SCREEN_SCISSOR_TL 0
-#define EG_SCISSOR__PA_SC_SCREEN_SCISSOR_BR 1
-#define EG_SCISSOR__PA_SC_WINDOW_OFFSET 2
-#define EG_SCISSOR__PA_SC_WINDOW_SCISSOR_TL 3
-#define EG_SCISSOR__PA_SC_WINDOW_SCISSOR_BR 4
-#define EG_SCISSOR__PA_SC_CLIPRECT_RULE 5
-#define EG_SCISSOR__PA_SC_CLIPRECT_0_TL 6
-#define EG_SCISSOR__PA_SC_CLIPRECT_0_BR 7
-#define EG_SCISSOR__PA_SC_CLIPRECT_1_TL 8
-#define EG_SCISSOR__PA_SC_CLIPRECT_1_BR 9
-#define EG_SCISSOR__PA_SC_CLIPRECT_2_TL 10
-#define EG_SCISSOR__PA_SC_CLIPRECT_2_BR 11
-#define EG_SCISSOR__PA_SC_CLIPRECT_3_TL 12
-#define EG_SCISSOR__PA_SC_CLIPRECT_3_BR 13
-#define EG_SCISSOR__PA_SC_EDGERULE 14
-#define EG_SCISSOR__PA_SC_GENERIC_SCISSOR_TL 15
-#define EG_SCISSOR__PA_SC_GENERIC_SCISSOR_BR 16
-#define EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL 17
-#define EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR 18
-#define EG_SCISSOR__PA_SU_HARDWARE_SCREEN_OFFSET 19
-#define EG_SCISSOR_SIZE 20
-#define EG_SCISSOR_PM4 128
-
-/* EG_BLEND */
-#define EG_BLEND__CB_BLEND_RED 0
-#define EG_BLEND__CB_BLEND_GREEN 1
-#define EG_BLEND__CB_BLEND_BLUE 2
-#define EG_BLEND__CB_BLEND_ALPHA 3
-#define EG_BLEND__CB_BLEND0_CONTROL 4
-#define EG_BLEND__CB_BLEND1_CONTROL 5
-#define EG_BLEND__CB_BLEND2_CONTROL 6
-#define EG_BLEND__CB_BLEND3_CONTROL 7
-#define EG_BLEND__CB_BLEND4_CONTROL 8
-#define EG_BLEND__CB_BLEND5_CONTROL 9
-#define EG_BLEND__CB_BLEND6_CONTROL 10
-#define EG_BLEND__CB_BLEND7_CONTROL 11
-#define EG_BLEND_SIZE 12
-#define EG_BLEND_PM4 128
-
-/* EG_DSA */
-#define EG_DSA__DB_STENCIL_CLEAR 0
-#define EG_DSA__DB_DEPTH_CLEAR 1
-#define EG_DSA__SX_ALPHA_TEST_CONTROL 2
-#define EG_DSA__DB_STENCILREFMASK 3
-#define EG_DSA__DB_STENCILREFMASK_BF 4
-#define EG_DSA__SX_ALPHA_REF 5
-#define EG_DSA__SPI_FOG_CNTL 6
-#define EG_DSA__DB_DEPTH_CONTROL 7
-#define EG_DSA__DB_SHADER_CONTROL 8
-#define EG_DSA__DB_RENDER_CONTROL 9
-#define EG_DSA__DB_COUNT_CONTROL 10
-#define EG_DSA__DB_RENDER_OVERRIDE 11
-#define EG_DSA__DB_RENDER_OVERRIDE2 12
-#define EG_DSA__DB_SRESULTS_COMPARE_STATE0 13
-#define EG_DSA__DB_SRESULTS_COMPARE_STATE1 14
-#define EG_DSA__DB_PRELOAD_CONTROL 15
-#define EG_DSA__DB_ALPHA_TO_MASK 16
-#define EG_DSA_SIZE 17
-#define EG_DSA_PM4 128
-
-/* EG_VS_SHADER */
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_0 0
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_1 1
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_2 2
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_3 3
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_4 4
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_5 5
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_6 6
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_7 7
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_8 8
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_9 9
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_10 10
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_11 11
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_12 12
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_13 13
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_14 14
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_15 15
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_16 16
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_17 17
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_18 18
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_19 19
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_20 20
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_21 21
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_22 22
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_23 23
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_24 24
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_25 25
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_26 26
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_27 27
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_28 28
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_29 29
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_30 30
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_31 31
-#define EG_VS_SHADER__SPI_VS_OUT_ID_0 32
-#define EG_VS_SHADER__SPI_VS_OUT_ID_1 33
-#define EG_VS_SHADER__SPI_VS_OUT_ID_2 34
-#define EG_VS_SHADER__SPI_VS_OUT_ID_3 35
-#define EG_VS_SHADER__SPI_VS_OUT_ID_4 36
-#define EG_VS_SHADER__SPI_VS_OUT_ID_5 37
-#define EG_VS_SHADER__SPI_VS_OUT_ID_6 38
-#define EG_VS_SHADER__SPI_VS_OUT_ID_7 39
-#define EG_VS_SHADER__SPI_VS_OUT_ID_8 40
-#define EG_VS_SHADER__SPI_VS_OUT_ID_9 41
-#define EG_VS_SHADER__SPI_VS_OUT_CONFIG 42
-#define EG_VS_SHADER__SQ_PGM_START_VS 43
-#define EG_VS_SHADER__SQ_PGM_RESOURCES_VS 44
-#define EG_VS_SHADER__SQ_PGM_RESOURCES_2_VS 45
-#define EG_VS_SHADER__SQ_PGM_START_FS 46
-#define EG_VS_SHADER__SQ_PGM_RESOURCES_FS 47
-#define EG_VS_SHADER_SIZE 48
-#define EG_VS_SHADER_PM4 128
-
-/* EG_PS_SHADER */
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_0 0
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_1 1
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_2 2
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_3 3
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_4 4
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_5 5
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_6 6
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_7 7
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_8 8
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_9 9
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_10 10
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_11 11
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_12 12
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_13 13
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_14 14
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_15 15
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_16 16
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_17 17
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_18 18
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_19 19
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_20 20
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_21 21
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_22 22
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_23 23
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_24 24
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_25 25
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_26 26
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_27 27
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_28 28
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_29 29
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_30 30
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_31 31
-#define EG_PS_SHADER__SPI_THREAD_GROUPING 32
-#define EG_PS_SHADER__SPI_PS_IN_CONTROL_0 33
-#define EG_PS_SHADER__SPI_PS_IN_CONTROL_1 34
-#define EG_PS_SHADER__SPI_INPUT_Z 35
-#define EG_PS_SHADER__SPI_BARYC_CNTL 36
-#define EG_PS_SHADER__SPI_PS_IN_CONTROL_2 37
-#define EG_PS_SHADER__SPI_COMPUTE_INPUT_CNTL 38
-#define EG_PS_SHADER__SQ_PGM_START_PS 39
-#define EG_PS_SHADER__SQ_PGM_RESOURCES_PS 40
-#define EG_PS_SHADER__SQ_PGM_RESOURCES_2_PS 41
-#define EG_PS_SHADER__SQ_PGM_EXPORTS_PS 42
-#define EG_PS_SHADER_SIZE 43
-#define EG_PS_SHADER_PM4 128
-
-/* EG_UCP */
-#define EG_UCP__PA_CL_UCP0_X 0
-#define EG_UCP__PA_CL_UCP0_Y 1
-#define EG_UCP__PA_CL_UCP0_Z 2
-#define EG_UCP__PA_CL_UCP0_W 3
-#define EG_UCP__PA_CL_UCP1_X 4
-#define EG_UCP__PA_CL_UCP1_Y 5
-#define EG_UCP__PA_CL_UCP1_Z 6
-#define EG_UCP__PA_CL_UCP1_W 7
-#define EG_UCP__PA_CL_UCP2_X 8
-#define EG_UCP__PA_CL_UCP2_Y 9
-#define EG_UCP__PA_CL_UCP2_Z 10
-#define EG_UCP__PA_CL_UCP2_W 11
-#define EG_UCP__PA_CL_UCP3_X 12
-#define EG_UCP__PA_CL_UCP3_Y 13
-#define EG_UCP__PA_CL_UCP3_Z 14
-#define EG_UCP__PA_CL_UCP3_W 15
-#define EG_UCP__PA_CL_UCP4_X 16
-#define EG_UCP__PA_CL_UCP4_Y 17
-#define EG_UCP__PA_CL_UCP4_Z 18
-#define EG_UCP__PA_CL_UCP4_W 19
-#define EG_UCP__PA_CL_UCP5_X 20
-#define EG_UCP__PA_CL_UCP5_Y 21
-#define EG_UCP__PA_CL_UCP5_Z 22
-#define EG_UCP__PA_CL_UCP5_W 23
-#define EG_UCP_SIZE 24
-#define EG_UCP_PM4 128
-
-/* EG_VS_CBUF */
-#define EG_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0 0
-#define EG_VS_CBUF__ALU_CONST_CACHE_VS_0 1
-#define EG_VS_CBUF_SIZE 2
-#define EG_VS_CBUF_PM4 128
-
-/* EG_PS_CBUF */
-#define EG_PS_CBUF__ALU_CONST_BUFFER_SIZE_PS_0 0
-#define EG_PS_CBUF__ALU_CONST_CACHE_PS_0 1
-#define EG_PS_CBUF_SIZE 2
-#define EG_PS_CBUF_PM4 128
-
-/* EG_PS_RESOURCE */
-#define EG_PS_RESOURCE__RESOURCE0_WORD0 0
-#define EG_PS_RESOURCE__RESOURCE0_WORD1 1
-#define EG_PS_RESOURCE__RESOURCE0_WORD2 2
-#define EG_PS_RESOURCE__RESOURCE0_WORD3 3
-#define EG_PS_RESOURCE__RESOURCE0_WORD4 4
-#define EG_PS_RESOURCE__RESOURCE0_WORD5 5
-#define EG_PS_RESOURCE__RESOURCE0_WORD6 6
-#define EG_PS_RESOURCE__RESOURCE0_WORD7 7
-#define EG_PS_RESOURCE_SIZE 8
-#define EG_PS_RESOURCE_PM4 128
-
-/* EG_VS_RESOURCE */
-#define EG_VS_RESOURCE__RESOURCE160_WORD0 0
-#define EG_VS_RESOURCE__RESOURCE160_WORD1 1
-#define EG_VS_RESOURCE__RESOURCE160_WORD2 2
-#define EG_VS_RESOURCE__RESOURCE160_WORD3 3
-#define EG_VS_RESOURCE__RESOURCE160_WORD4 4
-#define EG_VS_RESOURCE__RESOURCE160_WORD5 5
-#define EG_VS_RESOURCE__RESOURCE160_WORD6 6
-#define EG_VS_RESOURCE__RESOURCE160_WORD7 7
-#define EG_VS_RESOURCE_SIZE 8
-#define EG_VS_RESOURCE_PM4 128
-
-/* EG_FS_RESOURCE */
-#define EG_FS_RESOURCE__RESOURCE320_WORD0 0
-#define EG_FS_RESOURCE__RESOURCE320_WORD1 1
-#define EG_FS_RESOURCE__RESOURCE320_WORD2 2
-#define EG_FS_RESOURCE__RESOURCE320_WORD3 3
-#define EG_FS_RESOURCE__RESOURCE320_WORD4 4
-#define EG_FS_RESOURCE__RESOURCE320_WORD5 5
-#define EG_FS_RESOURCE__RESOURCE320_WORD6 6
-#define EG_FS_RESOURCE__RESOURCE320_WORD7 7
-#define EG_FS_RESOURCE_SIZE 8
-#define EG_FS_RESOURCE_PM4 128
-
-/* EG_GS_RESOURCE */
-#define EG_GS_RESOURCE__RESOURCE336_WORD0 0
-#define EG_GS_RESOURCE__RESOURCE336_WORD1 1
-#define EG_GS_RESOURCE__RESOURCE336_WORD2 2
-#define EG_GS_RESOURCE__RESOURCE336_WORD3 3
-#define EG_GS_RESOURCE__RESOURCE336_WORD4 4
-#define EG_GS_RESOURCE__RESOURCE336_WORD5 5
-#define EG_GS_RESOURCE__RESOURCE336_WORD6 6
-#define EG_GS_RESOURCE__RESOURCE336_WORD7 7
-#define EG_GS_RESOURCE_SIZE 8
-#define EG_GS_RESOURCE_PM4 128
-
-/* EG_PS_SAMPLER */
-#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0 0
-#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0 1
-#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0 2
-#define EG_PS_SAMPLER_SIZE 3
-#define EG_PS_SAMPLER_PM4 128
-
-/* EG_VS_SAMPLER */
-#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD0_18 0
-#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD1_18 1
-#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD2_18 2
-#define EG_VS_SAMPLER_SIZE 3
-#define EG_VS_SAMPLER_PM4 128
-
-/* EG_GS_SAMPLER */
-#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD0_36 0
-#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD1_36 1
-#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD2_36 2
-#define EG_GS_SAMPLER_SIZE 3
-#define EG_GS_SAMPLER_PM4 128
-
-/* EG_PS_SAMPLER_BORDER */
-#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX 0
-#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 1
-#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 2
-#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 3
-#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 4
-#define EG_PS_SAMPLER_BORDER_SIZE 5
-#define EG_PS_SAMPLER_BORDER_PM4 128
-
-/* EG_VS_SAMPLER_BORDER */
-#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_INDEX 0
-#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 1
-#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 2
-#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 3
-#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 4
-#define EG_VS_SAMPLER_BORDER_SIZE 5
-#define EG_VS_SAMPLER_BORDER_PM4 128
-
-/* EG_GS_SAMPLER_BORDER */
-#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_INDEX 0
-#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 1
-#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 2
-#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 3
-#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 4
-#define EG_GS_SAMPLER_BORDER_SIZE 5
-#define EG_GS_SAMPLER_BORDER_PM4 128
-
-/* EG_CB */
-#define EG_CB__CB_COLOR0_BASE 0
-#define EG_CB__CB_COLOR0_PITCH 1
-#define EG_CB__CB_COLOR0_SLICE 2
-#define EG_CB__CB_COLOR0_VIEW 3
-#define EG_CB__CB_COLOR0_INFO 4
-#define EG_CB__CB_COLOR0_ATTRIB 5
-#define EG_CB__CB_COLOR0_DIM 6
-#define EG_CB_SIZE 7
-#define EG_CB_PM4 128
-
-/* EG_DB */
-#define EG_DB__DB_HTILE_DATA_BASE 0
-#define EG_DB__DB_Z_INFO 1
-#define EG_DB__DB_STENCIL_INFO 2
-#define EG_DB__DB_DEPTH_SIZE 3
-#define EG_DB__DB_DEPTH_SLICE 4
-#define EG_DB__DB_DEPTH_VIEW 5
-#define EG_DB__DB_HTILE_SURFACE 6
-#define EG_DB__DB_Z_READ_BASE 7
-#define EG_DB__DB_STENCIL_READ_BASE 8
-#define EG_DB__DB_Z_WRITE_BASE 9
-#define EG_DB__DB_STENCIL_WRITE_BASE 10
-#define EG_DB_SIZE 11
-#define EG_DB_PM4 128
-
-/* EG_VGT */
-#define EG_VGT__VGT_PRIMITIVE_TYPE 0
-#define EG_VGT__VGT_MAX_VTX_INDX 1
-#define EG_VGT__VGT_MIN_VTX_INDX 2
-#define EG_VGT__VGT_INDX_OFFSET 3
-#define EG_VGT__VGT_DMA_INDEX_TYPE 4
-#define EG_VGT__VGT_PRIMITIVEID_EN 5
-#define EG_VGT__VGT_DMA_NUM_INSTANCES 6
-#define EG_VGT__VGT_MULTI_PRIM_IB_RESET_EN 7
-#define EG_VGT__VGT_INSTANCE_STEP_RATE_0 8
-#define EG_VGT__VGT_INSTANCE_STEP_RATE_1 9
-#define EG_VGT_SIZE 10
-#define EG_VGT_PM4 128
-
-/* EG_DRAW */
-#define EG_DRAW__VGT_NUM_INDICES 0
-#define EG_DRAW__VGT_DMA_BASE_HI 1
-#define EG_DRAW__VGT_DMA_BASE 2
-#define EG_DRAW__VGT_DRAW_INITIATOR 3
-#define EG_DRAW_SIZE 4
-#define EG_DRAW_PM4 128
-
-/* EG_VGT_EVENT */
-#define EG_VGT_EVENT__VGT_EVENT_INITIATOR 0
-#define EG_VGT_EVENT_SIZE 1
-#define EG_VGT_EVENT_PM4 128
-
-/* EG_CB_FLUSH */
-#define EG_CB_FLUSH_SIZE 0
-#define EG_CB_FLUSH_PM4 128
-
-/* EG_DB_FLUSH */
-#define EG_DB_FLUSH_SIZE 0
-#define EG_DB_FLUSH_PM4 128
-
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 9e1a5e1f98..77432661b6 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -103,7 +103,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
}
blend->cb_target_mask = target_mask;
r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
- color_control, 0xFFFFFFFF, NULL);
+ color_control, 0xFFFFFFFD, NULL);
r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
for (int i = 0; i < 8; i++) {
@@ -150,10 +150,6 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_DSA;
/* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */
- /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be
- * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will
- * be set if shader use texkill instruction
- */
db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
stencil_ref_mask = 0;
stencil_ref_mask_bf = 0;
@@ -210,7 +206,10 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL);
+ /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE,
+ * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by
+ * evergreen_pipe_shader_ps().*/
+ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL);
@@ -305,11 +304,16 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
{
struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
union util_color uc;
+ uint32_t coord_trunc = 0;
if (rstate == NULL) {
return NULL;
}
+ if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) ||
+ (state->min_img_filter == PIPE_TEX_FILTER_NEAREST))
+ coord_trunc = 1;
+
rstate->id = R600_PIPE_STATE_SAMPLER;
util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
@@ -328,6 +332,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
+ S_03C008_MC_COORD_TRUNCATE(coord_trunc) |
S_03C008_TYPE(1),
0xFFFFFFFF, NULL);
@@ -351,7 +356,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
struct r600_resource *rbuffer;
unsigned format;
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
- unsigned char swizzle[4];
+ unsigned char swizzle[4], array_mode = 0, tile_type = 0;
struct r600_bo *bo[2];
if (resource == NULL)
@@ -370,7 +375,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(state->format,
+ format = r600_translate_texformat(ctx->screen, state->format,
swizzle,
&word4, &yuv_format);
if (format == ~0) {
@@ -380,36 +385,43 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
if (desc == NULL) {
R600_ERR("unknow format %d\n", state->format);
}
- tmp = (struct r600_resource_texture*)texture;
+ tmp = (struct r600_resource_texture *)texture;
+ if (tmp->depth && !tmp->is_flushing_texture) {
+ r600_texture_depth_flush(ctx, texture, TRUE);
+ tmp = tmp->flushed_depth_texture;
+ }
+
+ if (tmp->force_int_type) {
+ word4 &= C_030010_NUM_FORMAT_ALL;
+ word4 |= S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_INT);
+ }
+
rbuffer = &tmp->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
- /* FIXME depth texture decompression */
- if (tmp->depth) {
- r600_texture_depth_flush(ctx, texture);
- tmp = (struct r600_resource_texture*)texture;
- rbuffer = &tmp->flushed_depth_texture->resource;
- bo[0] = rbuffer->bo;
- bo[1] = rbuffer->bo;
- }
- pitch = align(tmp->pitch_in_pixels[0], 8);
+
+ pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8);
+ array_mode = tmp->array_mode[0];
+ tile_type = tmp->tile_type;
/* FIXME properly handle first level != 0 */
r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
S_030000_DIM(r600_tex_dim(texture->target)) |
S_030000_PITCH((pitch / 8) - 1) |
+ S_030000_NON_DISP_TILING_ORDER(tile_type) |
S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
S_030004_TEX_HEIGHT(texture->height0 - 1) |
- S_030004_TEX_DEPTH(texture->depth0 - 1),
+ S_030004_TEX_DEPTH(texture->depth0 - 1) |
+ S_030004_ARRAY_MODE(array_mode),
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
(tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
(tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
- word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) |
- S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) |
+ word4 |
+ S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_NO_ZERO) |
S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
S_030014_LAST_LEVEL(state->u.tex.last_level) |
@@ -431,7 +443,8 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou
for (int i = 0; i < count; i++) {
if (resource[i]) {
- evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i);
+ evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
}
}
}
@@ -446,9 +459,11 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
for (i = 0; i < count; i++) {
if (&rctx->ps_samplers.views[i]->base != views[i]) {
if (resource[i])
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
else
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference(
(struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
@@ -457,7 +472,8 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
}
for (i = count; i < NUM_TEX_UNITS; i++) {
if (rctx->ps_samplers.views[i]) {
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
}
}
@@ -638,11 +654,19 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
unsigned color_info;
unsigned format, swap, ntype;
unsigned offset;
+ unsigned tile_type;
const struct util_format_description *desc;
struct r600_bo *bo[3];
+ int i;
surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
+
+ if (rtex->depth && !rtex->is_flushing_texture) {
+ r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE);
+ rtex = rtex->flushed_depth_texture;
+ }
+
rbuffer = &rtex->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
@@ -651,21 +675,43 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
/* XXX quite sure for dx10+ hw don't need any offset hacks */
offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture,
level, state->cbufs[cb]->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
ntype = 0;
- desc = util_format_description(rtex->resource.base.b.format);
+ desc = util_format_description(surf->base.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
ntype = V_028C70_NUMBER_SRGB;
- format = r600_translate_colorformat(rtex->resource.base.b.format);
- swap = r600_translate_colorswap(rtex->resource.base.b.format);
+ format = r600_translate_colorformat(surf->base.format);
+ swap = r600_translate_colorswap(surf->base.format);
+
+ /* disable when gallium grows int textures */
+ if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type)
+ ntype = 4;
+
color_info = S_028C70_FORMAT(format) |
S_028C70_COMP_SWAP(swap) |
+ S_028C70_ARRAY_MODE(rtex->array_mode[level]) |
S_028C70_BLEND_CLAMP(1) |
S_028C70_NUMBER_TYPE(ntype);
- if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- color_info |= S_028C70_SOURCE_FORMAT(1);
+
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ /* we can only set the export size if any thing is snorm/unorm component is > 11 bits,
+ if we aren't a float, sint or uint */
+ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
+ desc->channel[i].size < 12 && desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT &&
+ ntype != 4 && ntype != 5)
+ color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC);
+
+ if (rtex->array_mode[level] > V_028C70_ARRAY_LINEAR_ALIGNED) {
+ tile_type = rtex->tile_type;
+ } else /* workaround for linear buffers */
+ tile_type = 1;
/* FIXME handle enabling of CB beyond BASE8 which has different offset */
r600_pipe_state_add_reg(rstate,
@@ -690,7 +736,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C,
- S_028C74_NON_DISP_TILING_ORDER(1),
+ S_028C74_NON_DISP_TILING_ORDER(tile_type),
0xFFFFFFFF, bo[0]);
}
@@ -711,17 +757,14 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
- rtex->tiled = 1;
- rtex->array_mode[level] = 2;
- rtex->tile_type = 1;
- rtex->depth = 1;
+
rbuffer = &rtex->resource;
/* XXX quite sure for dx10+ hw don't need any offset hacks */
offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
level, state->zsbuf->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
@@ -770,8 +813,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&rctx->framebuffer, state);
- rctx->pframebuffer = &rctx->framebuffer;
-
/* build states */
for (int i = 0; i < state->nr_cbufs; i++) {
evergreen_cb(rctx, rstate, state, i);
@@ -839,48 +880,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
}
}
-static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
- struct pipe_resource *buffer)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_resource *rbuffer = (struct r600_resource*)buffer;
-
- /* Note that the state tracker can unbind constant buffers by
- * passing NULL here.
- */
- if (buffer == NULL) {
- return;
- }
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- rctx->vs_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028980_ALU_CONST_CACHE_VS_0,
- (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
- break;
- case PIPE_SHADER_FRAGMENT:
- rctx->ps_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028940_ALU_CONST_CACHE_PS_0,
- (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
- break;
- default:
- R600_ERR("unsupported %d\n", shader);
- return;
- }
-}
-
void evergreen_init_state_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_blend_state = evergreen_create_blend_state;
@@ -908,7 +907,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.delete_vs_state = r600_delete_vs_shader;
rctx->context.set_blend_color = evergreen_set_blend_color;
rctx->context.set_clip_state = evergreen_set_clip_state;
- rctx->context.set_constant_buffer = evergreen_set_constant_buffer;
+ rctx->context.set_constant_buffer = r600_set_constant_buffer;
rctx->context.set_fragment_sampler_views = evergreen_set_ps_sampler_view;
rctx->context.set_framebuffer_state = evergreen_set_framebuffer_state;
rctx->context.set_polygon_stipple = evergreen_set_polygon_stipple;
@@ -920,6 +919,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view;
rctx->context.set_viewport_state = evergreen_set_viewport_state;
rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
+ rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
}
void evergreen_init_config(struct r600_pipe_context *rctx)
@@ -1069,12 +1069,76 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
+ case CHIP_BARTS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 20;
+ num_gs_threads = 20;
+ num_es_threads = 20;
+ num_hs_threads = 20;
+ num_ls_threads = 20;
+ num_ps_stack_entries = 85;
+ num_vs_stack_entries = 85;
+ num_gs_stack_entries = 85;
+ num_es_stack_entries = 85;
+ num_hs_stack_entries = 85;
+ num_ls_stack_entries = 85;
+ break;
+ case CHIP_TURKS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 20;
+ num_gs_threads = 20;
+ num_es_threads = 20;
+ num_hs_threads = 20;
+ num_ls_threads = 20;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
+ case CHIP_CAICOS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 10;
+ num_gs_threads = 10;
+ num_es_threads = 10;
+ num_hs_threads = 10;
+ num_ls_threads = 10;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
}
tmp = 0x00000000;
switch (family) {
case CHIP_CEDAR:
case CHIP_PALM:
+ case CHIP_CAICOS:
break;
default:
tmp |= S_008C00_VC_ENABLE(1);
@@ -1260,226 +1324,11 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx)
}
}
-static void evergreen_spi_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_shader *shader = rctx->ps_shader;
- struct r600_pipe_state rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp;
-
- rstate.nregs = 0;
- for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
- }
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
- }
- r600_context_pipe_state_set(&rctx->ctx, &rstate);
-}
-
-void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_state *rstate;
- struct r600_resource *rbuffer;
- struct pipe_vertex_buffer *vertex_buffer;
- unsigned i, offset;
-
- /* we don't update until we know vertex elements */
- if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
- return;
-
- /* delete previous translated vertex elements */
- if (rctx->tran.new_velems) {
- r600_end_vertex_translate(rctx);
- }
-
- if (rctx->vertex_elements->incompatible_layout) {
- /* translate rebind new vertex elements so
- * return once translated
- */
- r600_begin_vertex_translate(rctx);
- return;
- }
-
- if (rctx->any_user_vbs) {
- r600_upload_user_buffers(rctx);
- rctx->any_user_vbs = FALSE;
- }
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- rctx->nvs_resource = rctx->vertex_elements->count;
- } else {
- /* bind vertex buffer once */
- rctx->nvs_resource = rctx->nvertex_buffer;
- }
-
- for (i = 0 ; i < rctx->nvs_resource; i++) {
- rstate = &rctx->vs_resource[i];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- unsigned vbuffer_index;
- vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->vbuffer_offset[i] +
- vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
- } else {
- /* bind vertex buffer once */
- vertex_buffer = &rctx->vertex_buffer[i];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
- }
-
- r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
- offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
- rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
- S_030008_STRIDE(vertex_buffer->stride),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
- S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
- S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
- S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
- S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
- 0xC0000000, 0xFFFFFFFF, NULL);
- evergreen_fs_resource_set(&rctx->ctx, rstate, i);
- }
-}
-
-int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
-void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_resource *rbuffer;
- u32 vgt_dma_index_type, vgt_draw_initiator, mask;
- struct r600_draw rdraw;
- struct r600_pipe_state vgt;
- struct r600_drawl draw;
- unsigned prim;
-
- memset(&draw, 0, sizeof(struct r600_drawl));
- draw.ctx = ctx;
- draw.mode = info->mode;
- draw.start = info->start;
- draw.count = info->count;
- if (info->indexed && rctx->index_buffer.buffer) {
- draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->index_bias;
-
- r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer,
- &rctx->index_buffer.index_size,
- &draw.start,
- info->count);
-
- draw.index_size = rctx->index_buffer.index_size;
- pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
- draw.index_buffer_offset = draw.start * draw.index_size;
- draw.start = 0;
- r600_upload_index_buffer(rctx, &draw);
- } else {
- draw.index_size = 0;
- draw.index_buffer = NULL;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->start;
- }
-
- switch (draw.index_size) {
- case 2:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 0;
- break;
- case 4:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 1;
- break;
- case 0:
- vgt_draw_initiator = 2;
- vgt_dma_index_type = 0;
- break;
- default:
- R600_ERR("unsupported index size %d\n", draw.index_size);
- return;
- }
- if (r600_conv_pipe_prim(draw.mode, &prim))
- return;
- if (unlikely(rctx->ps_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- if (unlikely(rctx->vs_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- /* there should be enough input */
- if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
- return;
- }
-
- evergreen_spi_update(rctx);
-
- mask = 0;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- mask |= (0xF << (i * 4));
- }
-
- vgt.id = R600_PIPE_STATE_VGT;
- vgt.nregs = 0;
- r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set(&rctx->ctx, &vgt);
-
- rdraw.vgt_num_indices = draw.count;
- rdraw.vgt_num_instances = 1;
- rdraw.vgt_index_type = vgt_dma_index_type;
- rdraw.vgt_draw_initiator = vgt_draw_initiator;
- rdraw.indices = NULL;
- if (draw.index_buffer) {
- rbuffer = (struct r600_resource*)draw.index_buffer;
- rdraw.indices = rbuffer->bo;
- rdraw.indices_bo_offset = draw.index_buffer_offset;
- }
- evergreen_context_draw(&rctx->ctx, &rdraw);
-
- pipe_resource_reference(&draw.index_buffer, NULL);
-}
-
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
- unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
+ unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
int pos_index = -1, face_index = -1;
int ninterp = 0;
boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
@@ -1487,6 +1336,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
rstate->nregs = 0;
+ db_shader_control = 0;
for (i = 0; i < rshader->ninput; i++) {
/* evergreen NUM_INTERP only contains values interpolated into the LDS,
POSITION goes via GPRs from the SC so isn't counted */
@@ -1508,16 +1358,12 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
}
for (i = 0; i < rshader->noutput; i++) {
if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_Z_EXPORT_ENABLE(1),
- S_02880C_Z_EXPORT_ENABLE(1), NULL);
+ db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_STENCIL_EXPORT_ENABLE(1),
- S_02880C_STENCIL_EXPORT_ENABLE(1), NULL);
+ db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(1);
}
+ if (rshader->uses_kill)
+ db_shader_control |= S_02880C_KILL_ENABLE(1);
exports_ps = 0;
num_cout = 0;
@@ -1592,15 +1438,15 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
r600_pipe_state_add_reg(rstate,
R_02884C_SQ_PGM_EXPORTS_PS,
exports_ps, 0xFFFFFFFF, NULL);
-
- if (rshader->uses_kill) {
- /* only set some bits here, the other bits are set in the dsa state */
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_KILL_ENABLE(1),
- S_02880C_KILL_ENABLE(1), NULL);
- }
-
+ /* FIXME: Evergreen doesn't seem to support MULTIWRITE_ENABLE. */
+ /* only set some bits here, the other bits are set in the dsa state */
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ db_shader_control,
+ S_02880C_Z_EXPORT_ENABLE(1) |
+ S_02880C_STENCIL_EXPORT_ENABLE(1) |
+ S_02880C_KILL_ENABLE(1),
+ NULL);
r600_pipe_state_add_reg(rstate,
R_03A200_SQ_LOOP_CONST_0, 0x01000FFF,
0xFFFFFFFF, NULL);
@@ -1651,6 +1497,18 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
0xFFFFFFFF, NULL);
}
+void evergreen_fetch_shader(struct r600_vertex_element *ve)
+{
+ struct r600_pipe_state *rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
+ (r600_bo_offset(ve->fetch_shader)) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
{
struct pipe_depth_stencil_alpha_state dsa;
@@ -1673,3 +1531,31 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
S_028000_COPY_CENTROID(1), NULL);
return rstate;
}
+
+void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride)
+{
+ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
+ rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
+ S_030008_STRIDE(stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
+ S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+ S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+ S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+ S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+}
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index e67254b256..c51a163bd0 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -108,8 +108,9 @@
#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8)
#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
#define PKT3_IT_OPCODE_C 0xFFFF00FF
+#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count))
+#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate))
/* Registers */
#define R_008C00_SQ_CONFIG 0x00008C00
@@ -327,6 +328,9 @@
#define S_028C70_SOURCE_FORMAT(x) (((x) & 0x3) << 24)
#define G_028C70_SOURCE_FORMAT(x) (((x) >> 24) & 0x3)
#define C_028C70_SOURCE_FORMAT 0xFCFFFFFF
+#define V_028C70_EXPORT_4C_32BPC 0x0
+#define V_028C70_EXPORT_4C_16BPC 0x1
+#define V_028C70_EXPORT_2C_32BPC 0x2 /* Do not use */
#define S_028C70_RAT(x) (((x) & 0x1) << 26)
#define G_028C70_RAT(x) (((x) >> 26) & 0x1)
#define C_028C70_RAT 0xFBFFFFFF
@@ -427,15 +431,6 @@
#define C_028800_STENCILZFAIL_BF 0x1FFFFFFF
#define R_028808_CB_COLOR_CONTROL 0x028808
-#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0)
-#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1)
-#define C_028808_FOG_ENABLE 0xFFFFFFFE
-#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1)
-#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1)
-#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD
-#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2)
-#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1)
-#define C_028808_DITHER_ENABLE 0xFFFFFFFB
#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7
@@ -939,6 +934,9 @@
#define V_030000_SQ_TEX_DIM_2D_ARRAY 0x00000005
#define V_030000_SQ_TEX_DIM_2D_MSAA 0x00000006
#define V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA 0x00000007
+#define S_030000_NON_DISP_TILING_ORDER(x) (((x) & 0x1) << 5)
+#define G_030000_NON_DISP_TILING_ORDER(x) (((x) >> 5) & 0x1)
+#define C_030000_NON_DISP_TILING_ORDER 0xFFFFFFDF
#define S_030000_PITCH(x) (((x) & 0xFFF) << 6)
#define G_030000_PITCH(x) (((x) >> 6) & 0xFFF)
#define C_030000_PITCH 0xFFFC003F
@@ -988,8 +986,8 @@
#define S_030010_SRF_MODE_ALL(x) (((x) & 0x1) << 10)
#define G_030010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1)
#define C_030010_SRF_MODE_ALL 0xFFFFFBFF
-#define V_030010_SFR_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000
-#define V_030010_SFR_MODE_NO_ZERO 0x00000001
+#define V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000
+#define V_030010_SRF_MODE_NO_ZERO 0x00000001
#define S_030010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11)
#define G_030010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1)
#define C_030010_FORCE_DEGAMMA 0xFFFFF7FF
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index aa456d493f..0b7d6f7096 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -35,7 +35,7 @@
#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4)
#define R600_ERR(fmt, args...) \
- fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args)
+ fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
typedef uint64_t u64;
typedef uint32_t u32;
@@ -92,6 +92,9 @@ enum radeon_family {
CHIP_CYPRESS,
CHIP_HEMLOCK,
CHIP_PALM,
+ CHIP_BARTS,
+ CHIP_TURKS,
+ CHIP_CAICOS,
CHIP_LAST,
};
@@ -110,14 +113,17 @@ struct r600_tiling_info {
enum radeon_family r600_get_family(struct radeon *rw);
enum chip_class r600_get_family_class(struct radeon *radeon);
struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon);
+unsigned r600_get_clock_crystal_freq(struct radeon *radeon);
+unsigned r600_get_minor_version(struct radeon *radeon);
+unsigned r600_get_num_backends(struct radeon *radeon);
/* r600_bo.c */
struct r600_bo;
struct r600_bo *r600_bo(struct radeon *radeon,
- unsigned size, unsigned alignment,
- unsigned binding, unsigned usage);
+ unsigned size, unsigned alignment,
+ unsigned binding, unsigned usage);
struct r600_bo *r600_bo_handle(struct radeon *radeon,
- unsigned handle, unsigned *array_mode);
+ unsigned handle, unsigned *array_mode);
void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst,
@@ -245,10 +251,9 @@ struct r600_context {
u32 *pm4;
struct list_head query_list;
unsigned num_query_running;
- unsigned fence;
struct list_head fenced_bo;
- unsigned *cfence;
- struct r600_bo *fence_bo;
+ unsigned max_db; /* for OQ */
+ boolean predicate_drawing;
};
struct r600_draw {
@@ -281,13 +286,11 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query);
void r600_query_end(struct r600_context *ctx, struct r600_query *query);
void r600_context_queries_suspend(struct r600_context *ctx);
void r600_context_queries_resume(struct r600_context *ctx);
+void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
+ int flag_wait);
int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon);
void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
-void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-
void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 1f41269534..240093f9b9 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -32,52 +32,118 @@
#include "r600_formats.h"
#include "r600d.h"
-static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
+#define NUM_OF_CYCLES 3
+#define NUM_OF_COMPONENTS 4
+
+static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
{
if(alu->is_op3)
return 3;
- switch (alu->inst) {
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
- return 0;
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
- return 2;
-
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
- return 1;
- default: R600_ERR(
- "Need instruction operand number for 0x%x.\n", alu->inst);
- };
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ switch (alu->inst) {
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
+ return 0;
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
+ return 2;
+
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+ return 1;
+ default: R600_ERR(
+ "Need instruction operand number for 0x%x.\n", alu->inst);
+ }
+ break;
+ case CHIPREV_EVERGREEN:
+ switch (alu->inst) {
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
+ return 0;
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW:
+ return 2;
+
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+ return 1;
+ default: R600_ERR(
+ "Need instruction operand number for 0x%x.\n", alu->inst);
+ }
+ break;
+ }
return 3;
}
@@ -104,7 +170,6 @@ static struct r600_bc_alu *r600_bc_alu(void)
if (alu == NULL)
return NULL;
LIST_INITHEAD(&alu->list);
- LIST_INITHEAD(&alu->bs_list);
return alu;
}
@@ -155,6 +220,9 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
+ case CHIP_BARTS:
+ case CHIP_TURKS:
+ case CHIP_CAICOS:
bc->chiprev = CHIPREV_EVERGREEN;
break;
default:
@@ -184,6 +252,37 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
{
int r;
+ if (bc->cf_last && (bc->cf_last->inst == output->inst ||
+ (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) &&
+ output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) &&
+ output->type == bc->cf_last->output.type &&
+ output->elem_size == bc->cf_last->output.elem_size &&
+ output->swizzle_x == bc->cf_last->output.swizzle_x &&
+ output->swizzle_y == bc->cf_last->output.swizzle_y &&
+ output->swizzle_z == bc->cf_last->output.swizzle_z &&
+ output->swizzle_w == bc->cf_last->output.swizzle_w &&
+ (output->burst_count + bc->cf_last->output.burst_count) <= 16) {
+
+ if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
+ (output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
+
+ bc->cf_last->output.end_of_program |= output->end_of_program;
+ bc->cf_last->output.inst = output->inst;
+ bc->cf_last->output.gpr = output->gpr;
+ bc->cf_last->output.array_base = output->array_base;
+ bc->cf_last->output.burst_count += output->burst_count;
+ return 0;
+
+ } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
+ output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
+
+ bc->cf_last->output.end_of_program |= output->end_of_program;
+ bc->cf_last->output.inst = output->inst;
+ bc->cf_last->output.burst_count += output->burst_count;
+ return 0;
+ }
+ }
+
r = r600_bc_add_cf(bc);
if (r)
return r;
@@ -192,221 +291,849 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
return 0;
}
-const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000
- SQ_ALU_VEC_120, //001
- SQ_ALU_VEC_102, //010
+/* alu instructions that can ony exits once per group */
+static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+ }
+}
+
+static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
+ }
+}
- SQ_ALU_VEC_201, //011
- SQ_ALU_VEC_012, //100
- SQ_ALU_VEC_021, //101
+static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 &&
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 &&
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
+ }
+}
- SQ_ALU_VEC_012, //110
- SQ_ALU_VEC_012}; //111
+static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+ }
+}
-const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000
- SQ_ALU_SCL_122, //001
- SQ_ALU_SCL_122, //010
+/* alu instructions that can only execute on the vector unit */
+static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ return is_alu_reduction_inst(bc, alu) ||
+ is_alu_mova_inst(bc, alu) ||
+ (bc->chiprev == CHIPREV_EVERGREEN &&
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR);
+}
- SQ_ALU_SCL_221, //011
- SQ_ALU_SCL_212, //100
- SQ_ALU_SCL_122, //101
+/* alu instructions that can only execute on the trans unit */
+static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ if (!alu->is_op3)
+ return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+ else
+ return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4;
+ case CHIPREV_EVERGREEN:
+ default:
+ if (!alu->is_op3)
+ /* Note that FLT_TO_INT_* instructions are vector-only instructions
+ * on Evergreen, despite what the documentation says. FLT_TO_INT
+ * can do both vector and scalar. */
+ return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+ else
+ return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
+ }
+}
- SQ_ALU_SCL_122, //110
- SQ_ALU_SCL_122}; //111
+/* alu instructions that can execute on any unit */
+static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ return !is_alu_vec_unit_inst(bc, alu) &&
+ !is_alu_trans_unit_inst(bc, alu);
+}
-static int init_gpr(struct r600_bc_alu *alu)
+static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first,
+ struct r600_bc_alu *assignment[5])
{
- int cycle, component;
+ struct r600_bc_alu *alu;
+ unsigned i, chan, trans;
+
+ for (i = 0; i < 5; i++)
+ assignment[i] = NULL;
+
+ for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
+ chan = alu->dst.chan;
+ if (is_alu_trans_unit_inst(bc, alu))
+ trans = 1;
+ else if (is_alu_vec_unit_inst(bc, alu))
+ trans = 0;
+ else if (assignment[chan])
+ trans = 1; // assume ALU_INST_PREFER_VECTOR
+ else
+ trans = 0;
+
+ if (trans) {
+ if (assignment[4]) {
+ assert(0); //ALU.Trans has already been allocated
+ return -1;
+ }
+ assignment[4] = alu;
+ } else {
+ if (assignment[chan]) {
+ assert(0); //ALU.chan has already been allocated
+ return -1;
+ }
+ assignment[chan] = alu;
+ }
+
+ if (alu->last)
+ break;
+ }
+ return 0;
+}
+
+struct alu_bank_swizzle {
+ int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+ int hw_cfile_addr[4];
+ int hw_cfile_elem[4];
+};
+
+static const unsigned cycle_for_bank_swizzle_vec[][3] = {
+ [SQ_ALU_VEC_012] = { 0, 1, 2 },
+ [SQ_ALU_VEC_021] = { 0, 2, 1 },
+ [SQ_ALU_VEC_120] = { 1, 2, 0 },
+ [SQ_ALU_VEC_102] = { 1, 0, 2 },
+ [SQ_ALU_VEC_201] = { 2, 0, 1 },
+ [SQ_ALU_VEC_210] = { 2, 1, 0 }
+};
+
+static const unsigned cycle_for_bank_swizzle_scl[][3] = {
+ [SQ_ALU_SCL_210] = { 2, 1, 0 },
+ [SQ_ALU_SCL_122] = { 1, 2, 2 },
+ [SQ_ALU_SCL_212] = { 2, 1, 2 },
+ [SQ_ALU_SCL_221] = { 2, 2, 1 }
+};
+
+static void init_bank_swizzle(struct alu_bank_swizzle *bs)
+{
+ int i, cycle, component;
/* set up gpr use */
for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
for (component = 0; component < NUM_OF_COMPONENTS; component++)
- alu->hw_gpr[cycle][component] = -1;
- return 0;
+ bs->hw_gpr[cycle][component] = -1;
+ for (i = 0; i < 4; i++)
+ bs->hw_cfile_addr[i] = -1;
+ for (i = 0; i < 4; i++)
+ bs->hw_cfile_elem[i] = -1;
}
-#if 0
-static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle)
+static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
{
- if (alu->hw_gpr[cycle][chan] < 0)
- alu->hw_gpr[cycle][chan] = sel;
- else if (alu->hw_gpr[cycle][chan] != (int)sel) {
- R600_ERR("Another scalar operation has already used GPR read port for channel\n");
+ if (bs->hw_gpr[cycle][chan] == -1)
+ bs->hw_gpr[cycle][chan] = sel;
+ else if (bs->hw_gpr[cycle][chan] != (int)sel) {
+ // Another scalar operation has already used GPR read port for channel
return -1;
}
return 0;
}
-static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
+static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
{
- int table[3];
- int ret = 0;
- switch (swiz) {
- case SQ_ALU_SCL_210:
- table[0] = 2; table[1] = 1; table[2] = 0;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_122:
- table[0] = 1; table[1] = 2; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_212:
- table[0] = 2; table[1] = 1; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_221:
- table[0] = 2; table[1] = 2; table[2] = 1;
- *p_cycle = table[sel];
- break;
- break;
- default:
- R600_ERR("bad scalar bank swizzle value\n");
- ret = -1;
- break;
+ int res, num_res = 4;
+ if (bc->chiprev >= CHIPREV_R700) {
+ num_res = 2;
+ chan /= 2;
+ }
+ for (res = 0; res < num_res; ++res) {
+ if (bs->hw_cfile_addr[res] == -1) {
+ bs->hw_cfile_addr[res] = sel;
+ bs->hw_cfile_elem[res] = chan;
+ return 0;
+ } else if (bs->hw_cfile_addr[res] == sel &&
+ bs->hw_cfile_elem[res] == chan)
+ return 0; // Read for this scalar element already reserved, nothing to do here.
}
- return ret;
+ // All cfile read ports are used, cannot reference vector element
+ return -1;
}
-static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
+static int is_gpr(unsigned sel)
{
- int table[3];
- int ret;
-
- switch (swiz) {
- case SQ_ALU_VEC_012:
- table[0] = 0; table[1] = 1; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_021:
- table[0] = 0; table[1] = 2; table[2] = 1;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_120:
- table[0] = 1; table[1] = 2; table[2] = 0;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_102:
- table[0] = 1; table[1] = 0; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_201:
- table[0] = 2; table[1] = 0; table[2] = 1;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_210:
- table[0] = 2; table[1] = 1; table[2] = 0;
- *p_cycle = table[sel];
- break;
- default:
- R600_ERR("bad vector bank swizzle value\n");
- ret = -1;
- break;
- }
- return ret;
+ return (sel >= 0 && sel <= 127);
}
+/* CB constants start at 512, and get translated to a kcache index when ALU
+ * clauses are constructed. Note that we handle kcache constants the same way
+ * as (the now gone) cfile constants, is that really required? */
+static int is_cfile(unsigned sel)
+{
+ return (sel > 255 && sel < 512) ||
+ (sel > 511 && sel < 4607) || // Kcache before translate
+ (sel > 127 && sel < 192); // Kcache after translate
+}
+static int is_const(int sel)
+{
+ return is_cfile(sel) ||
+ (sel >= V_SQ_ALU_SRC_0 &&
+ sel <= V_SQ_ALU_SRC_LITERAL);
+}
-static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter)
+static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu,
+ struct alu_bank_swizzle *bs, int bank_swizzle)
{
- int num_src;
- int i;
- int channel_swizzle;
+ int r, src, num_src, sel, elem, cycle;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; src++) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_gpr(sel)) {
+ cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
+ if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
+ // Nothing to do; special-case optimization,
+ // second source uses first source’s reservation
+ continue;
+ else {
+ r = reserve_gpr(bs, sel, elem, cycle);
+ if (r)
+ return r;
+ }
+ } else if (is_cfile(sel)) {
+ r = reserve_cfile(bc, bs, sel, elem);
+ if (r)
+ return r;
+ }
+ // No restrictions on PV, PS, literal or special constants
+ }
+ return 0;
+}
- num_src = r600_bc_get_num_operands(alu);
+static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu,
+ struct alu_bank_swizzle *bs, int bank_swizzle)
+{
+ int r, src, num_src, const_count, sel, elem, cycle;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (const_count = 0, src = 0; src < num_src; ++src) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_const(sel)) { // Any constant, including literal and inline constants
+ if (const_count >= 2)
+ // More than two references to a constant in
+ // transcendental operation.
+ return -1;
+ else
+ const_count++;
+ }
+ if (is_cfile(sel)) {
+ r = reserve_cfile(bc, bs, sel, elem);
+ if (r)
+ return r;
+ }
+ }
+ for (src = 0; src < num_src; ++src) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_gpr(sel)) {
+ cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
+ if (cycle < const_count)
+ // Cycle for GPR load conflicts with
+ // constant load in transcendental operation.
+ return -1;
+ r = reserve_gpr(bs, sel, elem, cycle);
+ if (r)
+ return r;
+ }
+ // Constants already processed
+ // No restrictions on PV, PS
+ }
+ return 0;
+}
- for (i = 0; i < num_src; i++) {
- channel_swizzle = alu->src[i].chan;
- if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3)
- chan_counter[channel_swizzle]++;
+static int check_and_set_bank_swizzle(struct r600_bc *bc,
+ struct r600_bc_alu *slots[5])
+{
+ struct alu_bank_swizzle bs;
+ int bank_swizzle[5];
+ int i, r = 0, forced = 0;
+
+ for (i = 0; i < 5; i++)
+ if (slots[i] && slots[i]->bank_swizzle_force) {
+ slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
+ forced = 1;
+ }
+
+ if (forced)
+ return 0;
+
+ // just check every possible combination of bank swizzle
+ // not very efficent, but works on the first try in most of the cases
+ for (i = 0; i < 4; i++)
+ bank_swizzle[i] = SQ_ALU_VEC_012;
+ bank_swizzle[4] = SQ_ALU_SCL_210;
+ while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
+ init_bank_swizzle(&bs);
+ for (i = 0; i < 4; i++) {
+ if (slots[i]) {
+ r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
+ if (r)
+ break;
+ }
+ }
+ if (!r && slots[4]) {
+ r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
+ }
+ if (!r) {
+ for (i = 0; i < 5; i++) {
+ if (slots[i])
+ slots[i]->bank_swizzle = bank_swizzle[i];
+ }
+ return 0;
+ }
+
+ for (i = 0; i < 5; i++) {
+ bank_swizzle[i]++;
+ if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+ break;
+ else
+ bank_swizzle[i] = SQ_ALU_VEC_012;
+ }
}
+
+ // couldn't find a working swizzle
+ return -1;
}
-/* we need something like this I think - but this is bogus */
-int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+static int replace_gpr_with_pv_ps(struct r600_bc *bc,
+ struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
{
- struct r600_bc_alu *alu;
- int chan_counter[4] = { 0 };
+ struct r600_bc_alu *prev[5];
+ int gpr[5], chan[5];
+ int i, j, r, src, num_src;
- update_chan_counter(alu_first, chan_counter);
+ r = assign_alu_units(bc, alu_prev, prev);
+ if (r)
+ return r;
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- update_chan_counter(alu, chan_counter);
+ for (i = 0; i < 5; ++i) {
+ if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
+ gpr[i] = prev[i]->dst.sel;
+ /* cube writes more than PV.X */
+ if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i]))
+ chan[i] = 0;
+ else
+ chan[i] = prev[i]->dst.chan;
+ } else
+ gpr[i] = -1;
}
- if (chan_counter[0] > 3 ||
- chan_counter[1] > 3 ||
- chan_counter[2] > 3 ||
- chan_counter[3] > 3) {
- R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n",
- alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]);
- return -1;
+ for (i = 0; i < 5; ++i) {
+ struct r600_bc_alu *alu = slots[i];
+ if(!alu)
+ continue;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; ++src) {
+ if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
+ continue;
+
+ if (alu->src[src].sel == gpr[4] &&
+ alu->src[src].chan == chan[4]) {
+ alu->src[src].sel = V_SQ_ALU_SRC_PS;
+ alu->src[src].chan = 0;
+ continue;
+ }
+
+ for (j = 0; j < 4; ++j) {
+ if (alu->src[src].sel == gpr[j] &&
+ alu->src[src].chan == j) {
+ alu->src[src].sel = V_SQ_ALU_SRC_PV;
+ alu->src[src].chan = chan[j];
+ break;
+ }
+ }
+ }
}
+
return 0;
}
-#endif
-static int is_const(int sel)
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
{
- if (sel > 255 && sel < 512)
- return 1;
- if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL)
- return 1;
- return 0;
+ switch(value) {
+ case 0:
+ *sel = V_SQ_ALU_SRC_0;
+ break;
+ case 1:
+ *sel = V_SQ_ALU_SRC_1_INT;
+ break;
+ case -1:
+ *sel = V_SQ_ALU_SRC_M_1_INT;
+ break;
+ case 0x3F800000: // 1.0f
+ *sel = V_SQ_ALU_SRC_1;
+ break;
+ case 0x3F000000: // 0.5f
+ *sel = V_SQ_ALU_SRC_0_5;
+ break;
+ case 0xBF800000: // -1.0f
+ *sel = V_SQ_ALU_SRC_1;
+ *neg ^= 1;
+ break;
+ case 0xBF000000: // -0.5f
+ *sel = V_SQ_ALU_SRC_0_5;
+ *neg ^= 1;
+ break;
+ default:
+ *sel = V_SQ_ALU_SRC_LITERAL;
+ break;
+ }
}
-static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu)
+/* compute how many literal are needed */
+static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu,
+ uint32_t literal[4], unsigned *nliteral)
{
- unsigned swizzle_key;
-
- if (alu->bank_swizzle_force) {
- alu->bank_swizzle = alu->bank_swizzle_force;
- return 0;
+ unsigned num_src = r600_bc_get_num_operands(bc, alu);
+ unsigned i, j;
+
+ for (i = 0; i < num_src; ++i) {
+ if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ uint32_t value = alu->src[i].value;
+ unsigned found = 0;
+ for (j = 0; j < *nliteral; ++j) {
+ if (literal[j] == value) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ if (*nliteral >= 4)
+ return -EINVAL;
+ literal[(*nliteral)++] = value;
+ }
+ }
}
- swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) +
- (is_const(alu->src[1].sel) ? 2 : 0 ) +
- (is_const(alu->src[2].sel) ? 1 : 0 );
-
- alu->bank_swizzle = bank_swizzle_scl[swizzle_key];
return 0;
}
-static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu)
+static void r600_bc_alu_adjust_literals(struct r600_bc *bc,
+ struct r600_bc_alu *alu,
+ uint32_t literal[4], unsigned nliteral)
+{
+ unsigned num_src = r600_bc_get_num_operands(bc, alu);
+ unsigned i, j;
+
+ for (i = 0; i < num_src; ++i) {
+ if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ uint32_t value = alu->src[i].value;
+ for (j = 0; j < nliteral; ++j) {
+ if (literal[j] == value) {
+ alu->src[i].chan = j;
+ break;
+ }
+ }
+ }
+ }
+}
+
+static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
+ struct r600_bc_alu *alu_prev)
{
- unsigned swizzle_key;
+ struct r600_bc_alu *prev[5];
+ struct r600_bc_alu *result[5] = { NULL };
+
+ uint32_t literal[4], prev_literal[4];
+ unsigned nliteral = 0, prev_nliteral = 0;
- if (alu->bank_swizzle_force) {
- alu->bank_swizzle = alu->bank_swizzle_force;
+ int i, j, r, src, num_src;
+ int num_once_inst = 0;
+ int have_mova = 0, have_rel = 0;
+
+ r = assign_alu_units(bc, alu_prev, prev);
+ if (r)
+ return r;
+
+ for (i = 0; i < 5; ++i) {
+ struct r600_bc_alu *alu;
+
+ /* check number of literals */
+ if (prev[i]) {
+ if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral))
+ return 0;
+ if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
+ return 0;
+ if (is_alu_mova_inst(bc, prev[i])) {
+ if (have_rel)
+ return 0;
+ have_mova = 1;
+ }
+ num_once_inst += is_alu_once_inst(bc, prev[i]);
+ }
+ if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral))
+ return 0;
+
+ // let's check used slots
+ if (prev[i] && !slots[i]) {
+ result[i] = prev[i];
+ continue;
+ } else if (prev[i] && slots[i]) {
+ if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
+ // trans unit is still free try to use it
+ if (is_alu_any_unit_inst(bc, slots[i])) {
+ result[i] = prev[i];
+ result[4] = slots[i];
+ } else if (is_alu_any_unit_inst(bc, prev[i])) {
+ result[i] = slots[i];
+ result[4] = prev[i];
+ } else
+ return 0;
+ } else
+ return 0;
+ } else if(!slots[i]) {
+ continue;
+ } else
+ result[i] = slots[i];
+
+ // let's check source gprs
+ alu = slots[i];
+ num_once_inst += is_alu_once_inst(bc, alu);
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; ++src) {
+ if (alu->src[src].rel) {
+ if (have_mova)
+ return 0;
+ have_rel = 1;
+ }
+
+ // constants doesn't matter
+ if (!is_gpr(alu->src[src].sel))
+ continue;
+
+ for (j = 0; j < 5; ++j) {
+ if (!prev[j] || !prev[j]->dst.write)
+ continue;
+
+ // if it's relative then we can't determin which gpr is really used
+ if (prev[j]->dst.chan == alu->src[src].chan &&
+ (prev[j]->dst.sel == alu->src[src].sel ||
+ prev[j]->dst.rel || alu->src[src].rel))
+ return 0;
+ }
+ }
+ }
+
+ /* more than one PRED_ or KILL_ ? */
+ if (num_once_inst > 1)
return 0;
+
+ /* check if the result can still be swizzlet */
+ r = check_and_set_bank_swizzle(bc, result);
+ if (r)
+ return 0;
+
+ /* looks like everything worked out right, apply the changes */
+
+ /* undo adding previus literals */
+ bc->cf_last->ndw -= align(prev_nliteral, 2);
+
+ /* sort instructions */
+ for (i = 0; i < 5; ++i) {
+ slots[i] = result[i];
+ if (result[i]) {
+ LIST_DEL(&result[i]->list);
+ result[i]->last = 0;
+ LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
+ }
+ }
+
+ /* determine new last instruction */
+ LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
+
+ /* determine new first instruction */
+ for (i = 0; i < 5; ++i) {
+ if (result[i]) {
+ bc->cf_last->curr_bs_head = result[i];
+ break;
+ }
}
- swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) +
- (is_const(alu->src[1].sel) ? 2 : 0 ) +
- (is_const(alu->src[2].sel) ? 1 : 0 );
- alu->bank_swizzle = bank_swizzle_vec[swizzle_key];
+ bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
+ bc->cf_last->prev2_bs_head = NULL;
+
return 0;
}
-static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+/* This code handles kcache lines as single blocks of 32 constants. We could
+ * probably do slightly better by recognizing that we actually have two
+ * consecutive lines of 16 constants, but the resulting code would also be
+ * somewhat more complicated. */
+static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type)
{
- struct r600_bc_alu *alu = NULL;
- int num_instr = 1;
+ struct r600_bc_kcache *kcache = bc->cf_last->kcache;
+ unsigned int required_lines;
+ unsigned int free_lines = 0;
+ unsigned int cache_line[3];
+ unsigned int count = 0;
+ unsigned int i, j;
+ int r;
+
+ /* Collect required cache lines. */
+ for (i = 0; i < 3; ++i) {
+ bool found = false;
+ unsigned int line;
+
+ if (alu->src[i].sel < 512)
+ continue;
- init_gpr(alu_first);
+ line = ((alu->src[i].sel - 512) / 32) * 2;
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- num_instr++;
+ for (j = 0; j < count; ++j) {
+ if (cache_line[j] == line) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ cache_line[count++] = line;
}
- if (num_instr == 1) {
- check_scalar(bc, alu_first);
-
- } else {
-/* check_read_slots(bc, bc->cf_last->curr_bs_head);*/
- check_vector(bc, alu_first);
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- check_vector(bc, alu);
+ /* This should never actually happen. */
+ if (count >= 3) return -ENOMEM;
+
+ for (i = 0; i < 2; ++i) {
+ if (kcache[i].mode == V_SQ_CF_KCACHE_NOP) {
+ ++free_lines;
+ }
+ }
+
+ /* Filter lines pulled in by previous intructions. Note that this is
+ * only for the required_lines count, we can't remove these from the
+ * cache_line array since we may have to start a new ALU clause. */
+ for (i = 0, required_lines = count; i < count; ++i) {
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == cache_line[i]) {
+ --required_lines;
+ break;
+ }
+ }
+ }
+
+ /* Start a new ALU clause if needed. */
+ if (required_lines > free_lines) {
+ if ((r = r600_bc_add_cf(bc))) {
+ return r;
+ }
+ bc->cf_last->inst = (type << 3);
+ kcache = bc->cf_last->kcache;
+ }
+
+ /* Setup the kcache lines. */
+ for (i = 0; i < count; ++i) {
+ bool found = false;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == cache_line[i]) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) continue;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_NOP) {
+ kcache[j].bank = 0;
+ kcache[j].addr = cache_line[i];
+ kcache[j].mode = V_SQ_CF_KCACHE_LOCK_2;
+ break;
+ }
+ }
+ }
+
+ /* Alter the src operands to refer to the kcache. */
+ for (i = 0; i < 3; ++i) {
+ static const unsigned int base[] = {128, 160, 256, 288};
+ unsigned int line;
+
+ if (alu->src[i].sel < 512)
+ continue;
+
+ alu->src[i].sel -= 512;
+ line = (alu->src[i].sel / 32) * 2;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == line) {
+ alu->src[i].sel &= 0x1f;
+ alu->src[i].sel += base[j];
+ break;
+ }
}
}
+
return 0;
}
@@ -419,62 +1146,100 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
if (nalu == NULL)
return -ENOMEM;
memcpy(nalu, alu, sizeof(struct r600_bc_alu));
- nalu->nliteral = 0;
+
+ if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
+ /* check if we could add it anyway */
+ if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) &&
+ type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) {
+ LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
+ if (lalu->predicate) {
+ bc->force_add_cf = 1;
+ break;
+ }
+ }
+ } else
+ bc->force_add_cf = 1;
+ }
/* cf can contains only alu or only vtx or only tex */
- if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
- bc->force_add_cf) {
+ if (bc->cf_last == NULL || bc->force_add_cf) {
r = r600_bc_add_cf(bc);
if (r) {
free(nalu);
return r;
}
- bc->cf_last->inst = (type << 3);
}
+ bc->cf_last->inst = (type << 3);
+
+ /* Setup the kcache for this ALU instruction. This will start a new
+ * ALU clause if needed. */
+ if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) {
+ free(nalu);
+ return r;
+ }
+
if (!bc->cf_last->curr_bs_head) {
bc->cf_last->curr_bs_head = nalu;
- LIST_INITHEAD(&nalu->bs_list);
- } else {
- LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list);
- }
- /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
- * worst case */
- if (alu->last && (bc->cf_last->ndw >> 1) >= 120) {
- bc->force_add_cf = 1;
}
/* number of gpr == the last gpr used in any alu */
for (i = 0; i < 3; i++) {
- if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
- bc->ngpr = alu->src[i].sel + 1;
- }
- /* compute how many literal are needed
- * either 2 or 4 literals
- */
- if (alu->src[i].sel == 253) {
- if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
- nalu->nliteral = (alu->src[i].chan + 2) & 0x6;
- }
+ if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
+ bc->ngpr = nalu->src[i].sel + 1;
}
+ if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
+ r600_bc_special_constants(nalu->src[i].value,
+ &nalu->src[i].sel, &nalu->src[i].neg);
}
- if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
- lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
- if (!lalu->last && lalu->nliteral > nalu->nliteral) {
- nalu->nliteral = lalu->nliteral;
- }
- }
- if (alu->dst.sel >= bc->ngpr) {
- bc->ngpr = alu->dst.sel + 1;
+ if (nalu->dst.sel >= bc->ngpr) {
+ bc->ngpr = nalu->dst.sel + 1;
}
LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
/* each alu use 2 dwords */
bc->cf_last->ndw += 2;
bc->ndw += 2;
- bc->cf_last->kcache0_mode = 2;
-
/* process cur ALU instructions for bank swizzle */
- if (alu->last) {
- check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
+ if (nalu->last) {
+ uint32_t literal[4];
+ unsigned nliteral;
+ struct r600_bc_alu *slots[5];
+ r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
+ if (r)
+ return r;
+
+ if (bc->cf_last->prev_bs_head) {
+ r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head);
+ if (r)
+ return r;
+ }
+
+ if (bc->cf_last->prev_bs_head) {
+ r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
+ if (r)
+ return r;
+ }
+
+ r = check_and_set_bank_swizzle(bc, slots);
+ if (r)
+ return r;
+
+ for (i = 0, nliteral = 0; i < 5; i++) {
+ if (slots[i]) {
+ r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral);
+ if (r)
+ return r;
+ }
+ }
+ bc->cf_last->ndw += align(nliteral, 2);
+
+ /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
+ * worst case */
+ if ((bc->cf_last->ndw >> 1) >= 120) {
+ bc->force_add_cf = 1;
+ }
+
+ bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
+ bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
bc->cf_last->curr_bs_head = NULL;
}
return 0;
@@ -485,42 +1250,22 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
}
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
+static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
{
- struct r600_bc_alu *alu;
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ return 8;
- if (bc->cf_last == NULL) {
- return 0;
- }
- if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
- return 0;
- }
- /* all same on EG */
- if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
- return 0;
- }
- /* same on EG */
- if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
- (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
- LIST_IS_EMPTY(&bc->cf_last->alu)) {
- R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
- return -EINVAL;
- }
- alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
- if (!alu->last || !alu->nliteral || alu->literal_added) {
- return 0;
+ case CHIPREV_R700:
+ return 16;
+
+ case CHIPREV_EVERGREEN:
+ return 64;
+
+ default:
+ R600_ERR("Unknown chiprev %d.\n", bc->chiprev);
+ return 8;
}
- memcpy(alu->value, value, 4 * 4);
- bc->cf_last->ndw += alu->nliteral;
- bc->ndw += alu->nliteral;
- alu->literal_added = 1;
- return 0;
}
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
@@ -548,7 +1293,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
/* each fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
- if ((bc->ndw / 4) > 7)
+ if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc))
bc->force_add_cf = 1;
return 0;
}
@@ -562,6 +1307,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
return -ENOMEM;
memcpy(ntex, tex, sizeof(struct r600_bc_tex));
+ /* we can't fetch data und use it as texture lookup address in the same TEX clause */
+ if (bc->cf_last != NULL &&
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
+ struct r600_bc_tex *ttex;
+ LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
+ if (ttex->dst_gpr == ntex->src_gpr) {
+ bc->force_add_cf = 1;
+ break;
+ }
+ }
+ }
+
/* cf can contains only alu or only vtx or only tex */
if (bc->cf_last == NULL ||
bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX ||
@@ -573,11 +1330,17 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
}
bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX;
}
+ if (ntex->src_gpr >= bc->ngpr) {
+ bc->ngpr = ntex->src_gpr + 1;
+ }
+ if (ntex->dst_gpr >= bc->ngpr) {
+ bc->ngpr = ntex->dst_gpr + 1;
+ }
LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex);
/* each texture fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
- if ((bc->ndw / 4) > 7)
+ if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc))
bc->force_add_cf = 1;
return 0;
}
@@ -597,31 +1360,8 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
/* common to all 3 families */
static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
{
- unsigned fetch_resource_start = 0;
-
- /* check if we are fetch shader */
- /* fetch shader can also access vertex resource,
- * first fetch shader resource is at 160
- */
- if (bc->type == -1) {
- switch (bc->chiprev) {
- /* r600 */
- case CHIPREV_R600:
- /* r700 */
- case CHIPREV_R700:
- fetch_resource_start = 160;
- break;
- /* evergreen */
- case CHIPREV_EVERGREEN:
- fetch_resource_start = 0;
- break;
- default:
- fprintf(stderr, "%s:%s:%d unknown chiprev %d\n",
- __FILE__, __func__, __LINE__, bc->chiprev);
- break;
- }
- }
- bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) |
+ bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
+ S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) |
S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
@@ -635,7 +1375,8 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign
S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
- bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
+ bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) |
+ S_SQ_VTX_WORD2_MEGA_FETCH(1);
bc->bytecode[id++] = 0;
return 0;
}
@@ -673,8 +1414,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
/* r600 only, r700/eg bits in r700_asm.c */
static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
{
- unsigned i;
-
/* don't replace gpr by pv or ps for destination register */
bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
@@ -705,22 +1444,23 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+ S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
}
- if (alu->last) {
- if (alu->nliteral && !alu->literal_added) {
- R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
- }
- for (i = 0; i < alu->nliteral; i++) {
- bc->bytecode[id++] = alu->value[i];
- }
- }
return 0;
}
+static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf)
+{
+ *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
+ *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+}
+
/* common for r600/r700 - eg in eg_asm.c */
static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
{
@@ -729,15 +1469,17 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
- S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
+ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
- S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
@@ -745,10 +1487,10 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
- bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
- bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+ if (bc->chiprev == CHIPREV_R700)
+ r700_bc_cf_vtx_build(&bc->bytecode[id], cf);
+ else
+ r600_bc_cf_vtx_build(&bc->bytecode[id], cf);
break;
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
@@ -756,7 +1498,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
- bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
@@ -793,8 +1536,10 @@ int r600_bc_build(struct r600_bc *bc)
struct r600_bc_alu *alu;
struct r600_bc_vtx *vtx;
struct r600_bc_tex *tex;
+ uint32_t literal[4];
+ unsigned nliteral;
unsigned addr;
- int r;
+ int i, r;
if (bc->callstack[0].max > 0)
bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
@@ -808,6 +1553,8 @@ int r600_bc_build(struct r600_bc *bc)
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
break;
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
@@ -854,8 +1601,16 @@ int r600_bc_build(struct r600_bc *bc)
return r;
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ nliteral = 0;
+ memset(literal, 0, sizeof(literal));
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+ if (r)
+ return r;
+ r600_bc_alu_adjust_literals(bc, alu, literal, nliteral);
switch(bc->chiprev) {
case CHIPREV_R600:
r = r600_bc_alu_build(bc, alu, addr);
@@ -872,7 +1627,11 @@ int r600_bc_build(struct r600_bc *bc)
return r;
addr += 2;
if (alu->last) {
- addr += alu->nliteral;
+ for (i = 0; i < align(nliteral, 2); ++i) {
+ bc->bytecode[addr++] = literal[i];
+ }
+ nliteral = 0;
+ memset(literal, 0, sizeof(literal));
}
}
break;
@@ -953,7 +1712,14 @@ void r600_bc_clear(struct r600_bc *bc)
void r600_bc_dump(struct r600_bc *bc)
{
- unsigned i;
+ struct r600_bc_cf *cf = NULL;
+ struct r600_bc_alu *alu = NULL;
+ struct r600_bc_vtx *vtx = NULL;
+ struct r600_bc_tex *tex = NULL;
+
+ unsigned i, id;
+ uint32_t literal[4];
+ unsigned nliteral;
char chip = '6';
switch (bc->chiprev) {
@@ -968,84 +1734,191 @@ void r600_bc_dump(struct r600_bc *bc)
chip = '6';
break;
}
- fprintf(stderr, "bytecode %d dw -----------------------\n", bc->ndw);
+ fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n", bc->ndw, bc->ngpr);
fprintf(stderr, " %c\n", chip);
- for (i = 0; i < bc->ndw; i++) {
- fprintf(stderr, "0x%08X\n", bc->bytecode[i]);
- }
- fprintf(stderr, "--------------------------------------\n");
-}
-void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
-{
- struct r600_pipe_state *rstate;
- unsigned i = 0;
-
- if (count > 8) {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(8 - 1);
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 8 - 1);
- } else {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 1);
- }
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
- S_SQ_CF_WORD1_BARRIER(1);
-
- rstate = &ve->rstate;
- rstate->id = R600_PIPE_STATE_FETCH_SHADER;
- rstate->nregs = 0;
- r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
- r600_bo_offset(ve->fetch_shader) >> 8,
- 0xFFFFFFFF, ve->fetch_shader);
-}
+ LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+ id = cf->id;
-void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
-{
- struct r600_pipe_state *rstate;
- unsigned i = 0;
-
- if (count > 8) {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(8 - 1);
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT((count - 8) - 1);
- } else {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 1);
+ switch (cf->inst) {
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d ", cf->addr);
+ fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode);
+ fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank);
+ fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank);
+ id++;
+ fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode);
+ fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr);
+ fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr);
+ fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+ fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d\n", cf->addr);
+ id++;
+ fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
+ break;
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+ fprintf(stderr, "GPR:%X ", cf->output.gpr);
+ fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
+ fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
+ fprintf(stderr, "TYPE:%X\n", cf->output.type);
+ id++;
+ fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+ fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x);
+ fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
+ fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
+ fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
+ fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
+ fprintf(stderr, "INST:%d ", cf->output.inst);
+ fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
+ fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+ case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+ case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+ fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
+ id++;
+ fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "COND:%X ", cf->cond);
+ fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
+ break;
+ }
+
+ id = cf->addr;
+ nliteral = 0;
+ LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
+ fprintf(stderr, "REL:%d ", alu->src[0].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[0].chan);
+ fprintf(stderr, "NEG:%d) ", alu->src[0].neg);
+ fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
+ fprintf(stderr, "REL:%d ", alu->src[1].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
+ fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
+ fprintf(stderr, "LAST:%d)\n", alu->last);
+ id++;
+ fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' ');
+ fprintf(stderr, "INST:%d ", alu->inst);
+ fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
+ fprintf(stderr, "CHAN:%d ", alu->dst.chan);
+ fprintf(stderr, "REL:%d ", alu->dst.rel);
+ fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
+ fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle);
+ if (alu->is_op3) {
+ fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel);
+ fprintf(stderr, "REL:%d ", alu->src[2].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[2].chan);
+ fprintf(stderr, "NEG:%d)\n", alu->src[2].neg);
+ } else {
+ fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs);
+ fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs);
+ fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write);
+ fprintf(stderr, "OMOD:%d ", alu->omod);
+ fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate);
+ fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate);
+ }
+
+ id++;
+ if (alu->last) {
+ for (i = 0; i < nliteral; i++, id++) {
+ float *f = (float*)(bc->bytecode + id);
+ fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f);
+ }
+ id += nliteral & 1;
+ nliteral = 0;
+ }
+ }
+
+ LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", tex->inst);
+ fprintf(stderr, "RESOURCE_ID:%d ", tex->resource_id);
+ fprintf(stderr, "SRC(GPR:%d ", tex->src_gpr);
+ fprintf(stderr, "REL:%d)\n", tex->src_rel);
+ id++;
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "DST(GPR:%d ", tex->dst_gpr);
+ fprintf(stderr, "REL:%d ", tex->dst_rel);
+ fprintf(stderr, "SEL_X:%d ", tex->dst_sel_x);
+ fprintf(stderr, "SEL_Y:%d ", tex->dst_sel_y);
+ fprintf(stderr, "SEL_Z:%d ", tex->dst_sel_z);
+ fprintf(stderr, "SEL_W:%d) ", tex->dst_sel_w);
+ fprintf(stderr, "LOD_BIAS:%d ", tex->lod_bias);
+ fprintf(stderr, "COORD_TYPE_X:%d ", tex->coord_type_x);
+ fprintf(stderr, "COORD_TYPE_Y:%d ", tex->coord_type_y);
+ fprintf(stderr, "COORD_TYPE_Z:%d ", tex->coord_type_z);
+ fprintf(stderr, "COORD_TYPE_W:%d\n", tex->coord_type_w);
+ id++;
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "OFFSET_X:%d ", tex->offset_x);
+ fprintf(stderr, "OFFSET_Y:%d ", tex->offset_y);
+ fprintf(stderr, "OFFSET_Z:%d ", tex->offset_z);
+ fprintf(stderr, "SAMPLER_ID:%d ", tex->sampler_id);
+ fprintf(stderr, "SRC(SEL_X:%d ", tex->src_sel_x);
+ fprintf(stderr, "SEL_Y:%d ", tex->src_sel_y);
+ fprintf(stderr, "SEL_Z:%d ", tex->src_sel_z);
+ fprintf(stderr, "SEL_W:%d)\n", tex->src_sel_w);
+ id++;
+ fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
+ id++;
+ }
+
+ LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", vtx->inst);
+ fprintf(stderr, "FETCH_TYPE:%d ", vtx->fetch_type);
+ fprintf(stderr, "BUFFER_ID:%d\n", vtx->buffer_id);
+ id++;
+ /* This assumes that no semantic fetches exist */
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr);
+ fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x);
+ fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count);
+ fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr);
+ fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x);
+ fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y);
+ fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z);
+ fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w);
+ fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields);
+ fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format);
+ fprintf(stderr, "NUM:%d ", vtx->num_format_all);
+ fprintf(stderr, "COMP:%d ", vtx->format_comp_all);
+ fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
+ id++;
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "OFFSET:%d\n", vtx->offset);
+ //TODO
+ id++;
+ fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
+ id++;
+ }
}
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
- S_SQ_CF_WORD1_BARRIER(1);
-
- rstate = &ve->rstate;
- rstate->id = R600_PIPE_STATE_FETCH_SHADER;
- rstate->nregs = 0;
- r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
- r600_bo_offset(ve->fetch_shader) >> 8,
- 0xFFFFFFFF, ve->fetch_shader);
+
+ fprintf(stderr, "--------------------------------------\n");
}
static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
@@ -1071,7 +1944,7 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
}
switch (desc->channel[i].type) {
- /* Half-floats, floats, doubles */
+ /* Half-floats, floats, ints */
case UTIL_FORMAT_TYPE_FLOAT:
switch (desc->channel[i].size) {
case 16:
@@ -1083,8 +1956,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
*format = FMT_16_16_FLOAT;
break;
case 3:
- *format = FMT_16_16_16_FLOAT;
- break;
case 4:
*format = FMT_16_16_16_16_FLOAT;
break;
@@ -1124,8 +1995,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
*format = FMT_8_8;
break;
case 3:
- // *format = FMT_8_8_8; /* fails piglit draw-vertices test */
- // break;
case 4:
*format = FMT_8_8_8_8;
break;
@@ -1140,8 +2009,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
*format = FMT_16_16;
break;
case 3:
- // *format = FMT_16_16_16; /* fails piglit draw-vertices test */
- // break;
case 4:
*format = FMT_16_16_16_16;
break;
@@ -1184,64 +2051,21 @@ out_unknown:
R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
}
-static void r600_bc(unsigned ndw, unsigned chiprev, u32 *bytecode)
-{
- unsigned i;
- char chip = '6';
-
- switch (chiprev) {
- case 1:
- chip = '7';
- break;
- case 2:
- chip = 'E';
- break;
- case 0:
- default:
- chip = '6';
- break;
- }
- fprintf(stderr, "bytecode %d dw -----------------------\n", ndw);
- fprintf(stderr, " %c\n", chip);
- for (i = 0; i < ndw; i++) {
- fprintf(stderr, "0x%08X\n", bytecode[i]);
- }
- fprintf(stderr, "--------------------------------------\n");
-}
-
int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
{
- unsigned ndw, i;
- u32 *bytecode;
- unsigned fetch_resource_start = 0, format, num_format, format_comp;
+ static int dump_shaders = -1;
+
+ struct r600_bc bc;
+ struct r600_bc_vtx vtx;
struct pipe_vertex_element *elements = ve->elements;
const struct util_format_description *desc;
-
- /* 2 dwords for cf aligned to 4 + 4 dwords per input */
- ndw = 8 + ve->count * 4;
- ve->fs_size = ndw * 4;
-
- /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
- ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
- if (ve->fetch_shader == NULL) {
- return -ENOMEM;
- }
-
- bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
- if (bytecode == NULL) {
- r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
- return -ENOMEM;
- }
-
- if (rctx->family >= CHIP_CEDAR) {
- eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
- } else {
- r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
- fetch_resource_start = 160;
- }
+ unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160;
+ unsigned format, num_format, format_comp;
+ u32 *bytecode;
+ int i, r;
/* vertex elements offset need special handling, if offset is bigger
- * than what we can put in fetch instruction then we need to alterate
+ + * than what we can put in fetch instruction then we need to alterate
* the vertex resource offset. In such case in order to simplify code
* we will bound one resource per elements. It's a worst case scenario.
*/
@@ -1252,40 +2076,111 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
}
}
+ memset(&bc, 0, sizeof(bc));
+ r = r600_bc_init(&bc, r600_get_family(rctx->radeon));
+ if (r)
+ return r;
+
+ for (i = 0; i < ve->count; i++) {
+ if (elements[i].instance_divisor > 1) {
+ struct r600_bc_alu alu;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 3;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
+
+ alu.dst.sel = i + 1;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(&bc, &alu))) {
+ r600_bc_clear(&bc);
+ return r;
+ }
+ }
+ }
+
for (i = 0; i < ve->count; i++) {
unsigned vbuffer_index;
- r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp);
- desc = util_format_description(ve->hw_format[i]);
+ r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp);
+ desc = util_format_description(ve->elements[i].src_format);
if (desc == NULL) {
- R600_ERR("unknown format %d\n", ve->hw_format[i]);
- r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ r600_bc_clear(&bc);
+ R600_ERR("unknown format %d\n", ve->elements[i].src_format);
return -EINVAL;
}
/* see above for vbuffer_need_offset explanation */
vbuffer_index = elements[i].vertex_buffer_index;
- if (ve->vbuffer_need_offset) {
- bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start);
- } else {
- bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start);
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start;
+ vtx.fetch_type = elements[i].instance_divisor ? 1 : 0;
+ vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
+ vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
+ vtx.mega_fetch_count = 0x1F;
+ vtx.dst_gpr = i + 1;
+ vtx.dst_sel_x = desc->swizzle[0];
+ vtx.dst_sel_y = desc->swizzle[1];
+ vtx.dst_sel_z = desc->swizzle[2];
+ vtx.dst_sel_w = desc->swizzle[3];
+ vtx.data_format = format;
+ vtx.num_format_all = num_format;
+ vtx.format_comp_all = format_comp;
+ vtx.srf_mode_all = 1;
+ vtx.offset = elements[i].src_offset;
+
+ if ((r = r600_bc_add_vtx(&bc, &vtx))) {
+ r600_bc_clear(&bc);
+ return r;
}
- bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
- S_SQ_VTX_WORD0_SRC_SEL_X(0) |
- S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
- bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) |
- S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) |
- S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) |
- S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) |
- S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
- S_SQ_VTX_WORD1_DATA_FORMAT(format) |
- S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) |
- S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) |
- S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
- S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1);
- bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) |
- S_SQ_VTX_WORD2_MEGA_FETCH(1);
- bytecode[8 + i * 4 + 3] = 0;
}
+
+ r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
+
+ if ((r = r600_bc_build(&bc))) {
+ r600_bc_clear(&bc);
+ return r;
+ }
+
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+
+ if (dump_shaders) {
+ fprintf(stderr, "--------------------------------------------------------------\n");
+ r600_bc_dump(&bc);
+ fprintf(stderr, "______________________________________________________________\n");
+ }
+
+ ve->fs_size = bc.ndw*4;
+
+ /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
+ ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, 0);
+ if (ve->fetch_shader == NULL) {
+ r600_bc_clear(&bc);
+ return -ENOMEM;
+ }
+
+ bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+ if (bytecode == NULL) {
+ r600_bc_clear(&bc);
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -ENOMEM;
+ }
+
+ memcpy(bytecode, bc.bytecode, ve->fs_size);
+
r600_bo_unmap(rctx->radeon, ve->fetch_shader);
+ r600_bc_clear(&bc);
+
+ if (rctx->family >= CHIP_CEDAR)
+ evergreen_fetch_shader(ve);
+ else
+ r600_fetch_shader(ve);
+
return 0;
}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index b147f0f5c8..27ea293ebe 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -25,9 +25,6 @@
#include "util/u_double_list.h"
-#define NUM_OF_CYCLES 3
-#define NUM_OF_COMPONENTS 4
-
struct r600_vertex_element;
struct r600_pipe_context;
@@ -37,6 +34,7 @@ struct r600_bc_alu_src {
unsigned neg;
unsigned abs;
unsigned rel;
+ uint32_t value;
};
struct r600_bc_alu_dst {
@@ -49,19 +47,15 @@ struct r600_bc_alu_dst {
struct r600_bc_alu {
struct list_head list;
- struct list_head bs_list; /* bank swizzle list */
struct r600_bc_alu_src src[3];
struct r600_bc_alu_dst dst;
unsigned inst;
unsigned last;
unsigned is_op3;
unsigned predicate;
- unsigned nliteral;
- unsigned literal_added;
unsigned bank_swizzle;
unsigned bank_swizzle_force;
- u32 value[4];
- int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+ unsigned omod;
};
struct r600_bc_tex {
@@ -109,6 +103,7 @@ struct r600_bc_vtx {
unsigned num_format_all;
unsigned format_comp_all;
unsigned srf_mode_all;
+ unsigned offset;
};
struct r600_bc_output {
@@ -122,9 +117,16 @@ struct r600_bc_output {
unsigned swizzle_y;
unsigned swizzle_z;
unsigned swizzle_w;
+ unsigned burst_count;
unsigned barrier;
};
+struct r600_bc_kcache {
+ unsigned bank;
+ unsigned mode;
+ unsigned addr;
+};
+
struct r600_bc_cf {
struct list_head list;
unsigned inst;
@@ -134,18 +136,15 @@ struct r600_bc_cf {
unsigned cond;
unsigned pop_count;
unsigned cf_addr; /* control flow addr */
- unsigned kcache0_mode;
- unsigned kcache1_mode;
- unsigned kcache0_addr;
- unsigned kcache1_addr;
- unsigned kcache0_bank;
- unsigned kcache1_bank;
+ struct r600_bc_kcache kcache[2];
unsigned r6xx_uses_waterfall;
struct list_head alu;
struct list_head tex;
struct list_head vtx;
struct r600_bc_output output;
struct r600_bc_alu *curr_bs_head;
+ struct r600_bc_alu *prev_bs_head;
+ struct r600_bc_alu *prev2_bs_head;
};
#define FC_NONE 0
@@ -191,26 +190,24 @@ struct r600_bc {
/* eg_asm.c */
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
-void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
/* r600_asm.c */
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
void r600_bc_clear(struct r600_bc *bc);
int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
int r600_bc_build(struct r600_bc *bc);
int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
void r600_bc_dump(struct r600_bc *bc);
-void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
-void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
/* r700_asm.c */
+void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf);
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
#endif
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 0f04136fb2..04408a5cc8 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -36,6 +36,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ rctx->blit = true;
r600_context_queries_suspend(&rctx->ctx);
util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]);
@@ -53,9 +54,9 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
if (rctx->states[R600_PIPE_STATE_CLIP]) {
util_blitter_save_clip(rctx->blitter, &rctx->clip);
}
- util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer);
-
- rctx->vertex_elements = NULL;
+ util_blitter_save_vertex_buffers(rctx->blitter,
+ rctx->vbuf_mgr->nr_vertex_buffers,
+ rctx->vbuf_mgr->vertex_buffer);
if (op & (R600_CLEAR_SURFACE | R600_COPY))
util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);
@@ -76,21 +77,26 @@ static void r600_blitter_end(struct pipe_context *ctx)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
r600_context_queries_resume(&rctx->ctx);
+ rctx->blit = false;
}
-int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
+void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
int level = 0;
float depth = 1.0f;
- surf_tmpl.format = texture->resource.base.b.format;
+
+ if (!texture->dirty_db)
+ return;
+
+ surf_tmpl.format = texture->resource.b.b.b.format;
surf_tmpl.u.tex.level = level;
surf_tmpl.u.tex.first_layer = 0;
surf_tmpl.u.tex.last_layer = 0;
surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
- zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl);
+ zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl);
surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format;
surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
@@ -108,8 +114,47 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te
pipe_surface_reference(&zsurf, NULL);
pipe_surface_reference(&cbsurf, NULL);
+ texture->dirty_db = FALSE;
+}
+
+void r600_flush_depth_textures(struct r600_pipe_context *rctx)
+{
+ unsigned int i;
+
+ if (rctx->blit) return;
+
+ /* FIXME: This handles fragment shader textures only. */
+
+ for (i = 0; i < rctx->ps_samplers.n_views; ++i) {
+ struct r600_pipe_sampler_view *view;
+ struct r600_resource_texture *tex;
+
+ view = rctx->ps_samplers.views[i];
+ if (!view) continue;
+
+ tex = (struct r600_resource_texture *)view->base.texture;
+ if (!tex->depth)
+ continue;
+
+ if (tex->is_flushing_texture)
+ continue;
- return 0;
+ r600_blit_uncompress_depth(&rctx->context, tex);
+ }
+
+ /* also check CB here */
+ for (i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
+ struct r600_resource_texture *tex;
+ tex = (struct r600_resource_texture *)rctx->framebuffer.cbufs[i]->texture;
+
+ if (!tex->depth)
+ continue;
+
+ if (tex->is_flushing_texture)
+ continue;
+
+ r600_blit_uncompress_depth(&rctx->context, tex);
+ }
}
static void r600_clear(struct pipe_context *ctx, unsigned buffers,
@@ -174,6 +219,52 @@ static void r600_hw_copy_region(struct pipe_context *ctx,
r600_blitter_end(ctx);
}
+struct texture_orig_info {
+ unsigned format;
+ unsigned width0;
+ unsigned height0;
+};
+
+static void r600_compressed_to_blittable(struct pipe_resource *tex,
+ unsigned level,
+ struct texture_orig_info *orig)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex;
+ unsigned pixsize = util_format_get_blocksize(tex->format);
+ int new_format;
+ int new_height, new_width;
+
+ orig->format = tex->format;
+ orig->width0 = tex->width0;
+ orig->height0 = tex->height0;
+
+ if (pixsize == 8)
+ new_format = PIPE_FORMAT_R16G16B16A16_UNORM; /* 64-bit block */
+ else
+ new_format = PIPE_FORMAT_R32G32B32A32_UNORM; /* 128-bit block */
+
+ new_width = util_format_get_nblocksx(tex->format, orig->width0);
+ new_height = util_format_get_nblocksy(tex->format, orig->height0);
+
+ rtex->force_int_type = true;
+ tex->width0 = new_width;
+ tex->height0 = new_height;
+ tex->format = new_format;
+
+}
+
+static void r600_reset_blittable_to_compressed(struct pipe_resource *tex,
+ unsigned level,
+ struct texture_orig_info *orig)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex;
+ rtex->force_int_type = false;
+
+ tex->format = orig->format;
+ tex->width0 = orig->width0;
+ tex->height0 = orig->height0;
+}
+
static void r600_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
unsigned dst_level,
@@ -182,15 +273,36 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
unsigned src_level,
const struct pipe_box *src_box)
{
- boolean is_depth;
- /* there is something wrong with depth resource copies at the moment so avoid them for now */
- is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
- if (is_depth)
- util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
- else
- r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
+ struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src;
+ struct texture_orig_info orig_info[2];
+ boolean restore_orig[2];
+
+ if (rsrc->depth && !rsrc->is_flushing_texture)
+ r600_texture_depth_flush(ctx, src, FALSE);
+
+ restore_orig[0] = restore_orig[1] = FALSE;
+
+ if (util_format_is_compressed(src->format)) {
+ r600_compressed_to_blittable(src, src_level, &orig_info[0]);
+ restore_orig[0] = TRUE;
+ }
+
+ if (util_format_is_compressed(dst->format)) {
+ r600_compressed_to_blittable(dst, dst_level, &orig_info[1]);
+ restore_orig[1] = TRUE;
+ /* translate the dst box as well */
+ dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
+ dsty = util_format_get_nblocksy(orig_info[1].format, dsty);
+ }
+
+ r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
+
+ if (restore_orig[0])
+ r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]);
+
+ if (restore_orig[1])
+ r600_reset_blittable_to_compressed(dst, dst_level, &orig_info[1]);
}
void r600_init_blit_functions(struct r600_pipe_context *rctx)
@@ -200,3 +312,19 @@ void r600_init_blit_functions(struct r600_pipe_context *rctx)
rctx->context.clear_depth_stencil = r600_clear_depth_stencil;
rctx->context.resource_copy_region = r600_resource_copy_region;
}
+
+void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
+{
+ struct pipe_box sbox;
+
+ sbox.x = sbox.y = sbox.z = 0;
+ sbox.width = texture->resource.b.b.b.width0;
+ sbox.height = texture->resource.b.b.b.height0;
+ /* XXX that might be wrong */
+ sbox.depth = 1;
+
+ r600_hw_copy_region(ctx, (struct pipe_resource *)texture, 0,
+ 0, 0, 0,
+ (struct pipe_resource *)texture->flushed_depth_texture, 0,
+ &sbox);
+}
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 03a61a3213..6ced719c8f 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -29,83 +29,50 @@
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
-#include <util/u_upload_mgr.h>
+#include "util/u_upload_mgr.h"
+
#include "state_tracker/drm_driver.h"
+
#include <xf86drm.h>
#include "radeon_drm.h"
+
#include "r600.h"
#include "r600_pipe.h"
-extern struct u_resource_vtbl r600_buffer_vtbl;
-
-
-struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
- const struct pipe_resource *templ)
-{
- struct r600_resource_buffer *rbuffer;
- struct r600_bo *bo;
- /* XXX We probably want a different alignment for buffers and textures. */
- unsigned alignment = 4096;
-
- rbuffer = CALLOC_STRUCT(r600_resource_buffer);
- if (rbuffer == NULL)
- return NULL;
-
- rbuffer->magic = R600_BUFFER_MAGIC;
- rbuffer->user_buffer = NULL;
- rbuffer->num_ranges = 0;
- rbuffer->r.base.b = *templ;
- pipe_reference_init(&rbuffer->r.base.b.reference, 1);
- rbuffer->r.base.b.screen = screen;
- rbuffer->r.base.vtbl = &r600_buffer_vtbl;
- rbuffer->r.size = rbuffer->r.base.b.width0;
- bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage);
- if (bo == NULL) {
- FREE(rbuffer);
- return NULL;
- }
- rbuffer->r.bo = bo;
- return &rbuffer->r.base.b;
-}
-
-struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
- void *ptr, unsigned bytes,
- unsigned bind)
-{
- struct r600_resource_buffer *rbuffer;
-
- rbuffer = CALLOC_STRUCT(r600_resource_buffer);
- if (rbuffer == NULL)
- return NULL;
-
- rbuffer->magic = R600_BUFFER_MAGIC;
- pipe_reference_init(&rbuffer->r.base.b.reference, 1);
- rbuffer->r.base.vtbl = &r600_buffer_vtbl;
- rbuffer->r.base.b.screen = screen;
- rbuffer->r.base.b.target = PIPE_BUFFER;
- rbuffer->r.base.b.format = PIPE_FORMAT_R8_UNORM;
- rbuffer->r.base.b.usage = PIPE_USAGE_IMMUTABLE;
- rbuffer->r.base.b.bind = bind;
- rbuffer->r.base.b.width0 = bytes;
- rbuffer->r.base.b.height0 = 1;
- rbuffer->r.base.b.depth0 = 1;
- rbuffer->r.base.b.array_size = 1;
- rbuffer->r.base.b.flags = 0;
- rbuffer->num_ranges = 0;
- rbuffer->r.bo = NULL;
- rbuffer->user_buffer = ptr;
- return &rbuffer->r.base.b;
-}
-
static void r600_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
struct r600_resource_buffer *rbuffer = r600_buffer(buf);
if (rbuffer->r.bo) {
r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL);
}
- FREE(rbuffer);
+ rbuffer->r.bo = NULL;
+ util_slab_free(&rscreen->pool_buffers, rbuffer);
+}
+
+static struct pipe_transfer *r600_get_transfer(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx;
+ struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
+
+ transfer->resource = resource;
+ transfer->level = level;
+ transfer->usage = usage;
+ transfer->box = *box;
+ transfer->stride = 0;
+ transfer->layer_stride = 0;
+ transfer->data = NULL;
+
+ /* Note strides are zero, this is ok for buffers, but not for
+ * textures 2d & higher at least.
+ */
+ return transfer;
}
static void *r600_buffer_transfer_map(struct pipe_context *pipe,
@@ -114,29 +81,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
int write = 0;
uint8_t *data;
- int i;
- boolean flush = FALSE;
-
- if (rbuffer->user_buffer)
- return (uint8_t*)rbuffer->user_buffer + transfer->box.x;
-
- if (transfer->usage & PIPE_TRANSFER_DISCARD) {
- for (i = 0; i < rbuffer->num_ranges; i++) {
- if ((transfer->box.x >= rbuffer->ranges[i].start) &&
- (transfer->box.x < rbuffer->ranges[i].end))
- flush = TRUE;
-
- if (flush) {
- r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL);
- rbuffer->num_ranges = 0;
- rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys,
- rbuffer->r.base.b.width0, 0,
- rbuffer->r.base.b.bind,
- rbuffer->r.base.b.usage);
- break;
- }
- }
- }
+
+ if (rbuffer->r.b.user_ptr)
+ return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x;
+
if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) {
/* FIXME */
}
@@ -155,44 +103,122 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
{
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
+ if (rbuffer->r.b.user_ptr)
+ return;
+
if (rbuffer->r.bo)
r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo);
}
static void r600_buffer_transfer_flush_region(struct pipe_context *pipe,
- struct pipe_transfer *transfer,
- const struct pipe_box *box)
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
{
- struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
- unsigned i;
- unsigned offset = transfer->box.x + box->x;
- unsigned length = box->width;
+}
- assert(box->x + box->width <= transfer->box.width);
+static void r600_transfer_destroy(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx;
+ util_slab_free(&rctx->pool_transfers, transfer);
+}
- if (rbuffer->user_buffer)
- return;
+static void r600_buffer_transfer_inline_write(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
+{
+ struct radeon *ws = (struct radeon*)pipe->winsys;
+ struct r600_resource_buffer *rbuffer = r600_buffer(resource);
+ uint8_t *map = NULL;
- /* mark the range as used */
- for(i = 0; i < rbuffer->num_ranges; ++i) {
- if(offset <= rbuffer->ranges[i].end && rbuffer->ranges[i].start <= (offset+box->width)) {
- rbuffer->ranges[i].start = MIN2(rbuffer->ranges[i].start, offset);
- rbuffer->ranges[i].end = MAX2(rbuffer->ranges[i].end, (offset+length));
- return;
- }
- }
+ assert(rbuffer->r.b.user_ptr == NULL);
+
+ map = r600_bo_map(ws, rbuffer->r.bo,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage,
+ pipe);
+
+ memcpy(map + box->x, data, box->width);
- rbuffer->ranges[rbuffer->num_ranges].start = offset;
- rbuffer->ranges[rbuffer->num_ranges].end = offset+length;
- rbuffer->num_ranges++;
+ if (rbuffer->r.bo)
+ r600_bo_unmap(ws, rbuffer->r.bo);
}
-unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
- struct pipe_resource *buf,
- unsigned level, int layer)
+static const struct u_resource_vtbl r600_buffer_vtbl =
{
- /* FIXME */
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ u_default_resource_get_handle, /* get_handle */
+ r600_buffer_destroy, /* resource_destroy */
+ r600_get_transfer, /* get_transfer */
+ r600_transfer_destroy, /* transfer_destroy */
+ r600_buffer_transfer_map, /* transfer_map */
+ r600_buffer_transfer_flush_region, /* transfer_flush_region */
+ r600_buffer_transfer_unmap, /* transfer_unmap */
+ r600_buffer_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
+ struct r600_resource_buffer *rbuffer;
+ struct r600_bo *bo;
+ /* XXX We probably want a different alignment for buffers and textures. */
+ unsigned alignment = 4096;
+
+ rbuffer = util_slab_alloc(&rscreen->pool_buffers);
+
+ rbuffer->magic = R600_BUFFER_MAGIC;
+ rbuffer->r.b.b.b = *templ;
+ pipe_reference_init(&rbuffer->r.b.b.b.reference, 1);
+ rbuffer->r.b.b.b.screen = screen;
+ rbuffer->r.b.b.vtbl = &r600_buffer_vtbl;
+ rbuffer->r.b.user_ptr = NULL;
+ rbuffer->r.size = rbuffer->r.b.b.b.width0;
+ rbuffer->r.bo_size = rbuffer->r.size;
+
+ bo = r600_bo((struct radeon*)screen->winsys,
+ rbuffer->r.b.b.b.width0,
+ alignment, rbuffer->r.b.b.b.bind,
+ rbuffer->r.b.b.b.usage);
+
+ if (bo == NULL) {
+ FREE(rbuffer);
+ return NULL;
+ }
+ rbuffer->r.bo = bo;
+ return &rbuffer->r.b.b.b;
+}
+
+struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
+ void *ptr, unsigned bytes,
+ unsigned bind)
+{
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
+ struct r600_resource_buffer *rbuffer;
+
+ rbuffer = util_slab_alloc(&rscreen->pool_buffers);
+
+ rbuffer->magic = R600_BUFFER_MAGIC;
+ pipe_reference_init(&rbuffer->r.b.b.b.reference, 1);
+ rbuffer->r.b.b.vtbl = &r600_buffer_vtbl;
+ rbuffer->r.b.b.b.screen = screen;
+ rbuffer->r.b.b.b.target = PIPE_BUFFER;
+ rbuffer->r.b.b.b.format = PIPE_FORMAT_R8_UNORM;
+ rbuffer->r.b.b.b.usage = PIPE_USAGE_IMMUTABLE;
+ rbuffer->r.b.b.b.bind = bind;
+ rbuffer->r.b.b.b.width0 = bytes;
+ rbuffer->r.b.b.b.height0 = 1;
+ rbuffer->r.b.b.b.depth0 = 1;
+ rbuffer->r.b.b.b.array_size = 1;
+ rbuffer->r.b.b.b.flags = 0;
+ rbuffer->r.b.user_ptr = ptr;
+ rbuffer->r.bo = NULL;
+ rbuffer->r.bo_size = 0;
+ return &rbuffer->r.b.b.b;
}
struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
@@ -213,82 +239,39 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
return NULL;
}
- pipe_reference_init(&rbuffer->base.b.reference, 1);
- rbuffer->base.b.target = PIPE_BUFFER;
- rbuffer->base.b.screen = screen;
- rbuffer->base.vtbl = &r600_buffer_vtbl;
+ pipe_reference_init(&rbuffer->b.b.b.reference, 1);
+ rbuffer->b.b.b.target = PIPE_BUFFER;
+ rbuffer->b.b.b.screen = screen;
+ rbuffer->b.b.vtbl = &r600_buffer_vtbl;
rbuffer->bo = bo;
- return &rbuffer->base.b;
+ return &rbuffer->b.b.b;
}
-struct u_resource_vtbl r600_buffer_vtbl =
+void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw)
{
- u_default_resource_get_handle, /* get_handle */
- r600_buffer_destroy, /* resource_destroy */
- r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */
- u_default_get_transfer, /* get_transfer */
- u_default_transfer_destroy, /* transfer_destroy */
- r600_buffer_transfer_map, /* transfer_map */
- r600_buffer_transfer_flush_region, /* transfer_flush_region */
- r600_buffer_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
-};
+ struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer);
+ boolean flushed;
+
+ u_upload_data(rctx->vbuf_mgr->uploader, 0,
+ draw->info.count * draw->index_size,
+ rbuffer->r.b.user_ptr,
+ &draw->index_buffer_offset,
+ &draw->index_buffer, &flushed);
+}
-int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw)
+void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer,
+ uint32_t *const_offset)
{
- struct pipe_resource *upload_buffer = NULL;
- unsigned index_offset = draw->index_buffer_offset;
- int ret = 0;
-
- if (r600_buffer_is_user_buffer(draw->index_buffer)) {
- ret = u_upload_buffer(rctx->upload_ib,
- index_offset,
- draw->count * draw->index_size,
- draw->index_buffer,
- &index_offset,
- &upload_buffer);
- if (ret) {
- goto done;
- }
- draw->index_buffer_offset = index_offset;
-
- /* Transfer ownership. */
- pipe_resource_reference(&draw->index_buffer, upload_buffer);
- pipe_resource_reference(&upload_buffer, NULL);
- }
+ if ((*rbuffer)->r.b.user_ptr) {
+ uint8_t *ptr = (*rbuffer)->r.b.user_ptr;
+ unsigned size = (*rbuffer)->r.b.b.b.width0;
+ boolean flushed;
-done:
- return ret;
-}
+ *rbuffer = NULL;
-int r600_upload_user_buffers(struct r600_pipe_context *rctx)
-{
- enum pipe_error ret = PIPE_OK;
- int i, nr;
-
- nr = rctx->vertex_elements->count;
- nr = rctx->nvertex_buffer;
-
- for (i = 0; i < nr; i++) {
-// struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index];
- struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
-
- if (r600_buffer_is_user_buffer(vb->buffer)) {
- struct pipe_resource *upload_buffer = NULL;
- unsigned offset = 0; /*vb->buffer_offset * 4;*/
- unsigned size = vb->buffer->width0;
- unsigned upload_offset;
- ret = u_upload_buffer(rctx->upload_vb,
- offset, size,
- vb->buffer,
- &upload_offset, &upload_buffer);
- if (ret)
- return ret;
-
- pipe_resource_reference(&vb->buffer, NULL);
- vb->buffer = upload_buffer;
- vb->buffer_offset = upload_offset;
- }
+ u_upload_data(rctx->vbuf_mgr->uploader, 0, size, ptr, const_offset,
+ (struct pipe_resource**)rbuffer, &flushed);
+ } else {
+ *const_offset = 0;
}
- return ret;
}
diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h
index 2ee0c83e5d..a85d0bbf1e 100644
--- a/src/gallium/drivers/r600/r600_opcodes.h
+++ b/src/gallium/drivers/r600/r600_opcodes.h
@@ -330,10 +330,14 @@
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099
/* TODO Fill in more ALU */
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT 0x0000009B
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT 0x0000009C
+/* TODO Fill in more ALU */
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4 0x000000C1
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 6842571044..0e28bda6eb 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -30,12 +30,13 @@
#include <tgsi/tgsi_util.h>
#include <util/u_blitter.h>
#include <util/u_double_list.h>
+#include <util/u_format_s3tc.h>
#include <util/u_transfer.h>
#include <util/u_surface.h>
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
-#include <util/u_upload_mgr.h>
+#include "util/u_upload_mgr.h"
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "r600d.h"
@@ -47,7 +48,7 @@
/*
* pipe_context
*/
-static void r600_flush(struct pipe_context *ctx, unsigned flags,
+static void r600_flush(struct pipe_context *ctx,
struct pipe_fence_handle **fence)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
@@ -59,9 +60,6 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
if (!rctx->ctx.pm4_cdwords)
return;
- u_upload_flush(rctx->upload_vb);
- u_upload_flush(rctx->upload_ib);
-
#if 0
sprintf(dname, "gallium-%08d.bof", dc);
if (dc < 20) {
@@ -71,6 +69,30 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
dc++;
#endif
r600_context_flush(&rctx->ctx);
+
+ /* XXX This shouldn't be really necessary, but removing it breaks some tests.
+ * Needless buffer reallocations may significantly increase memory consumption,
+ * so getting rid of this call is important. */
+ u_upload_flush(rctx->vbuf_mgr->uploader);
+}
+
+static void r600_update_num_contexts(struct r600_screen *rscreen, int diff)
+{
+ pipe_mutex_lock(rscreen->mutex_num_contexts);
+ if (diff > 0) {
+ rscreen->num_contexts++;
+
+ if (rscreen->num_contexts > 1)
+ util_slab_set_thread_safety(&rscreen->pool_buffers,
+ UTIL_SLAB_MULTITHREADED);
+ } else {
+ rscreen->num_contexts--;
+
+ if (rscreen->num_contexts <= 1)
+ util_slab_set_thread_safety(&rscreen->pool_buffers,
+ UTIL_SLAB_SINGLETHREADED);
+ }
+ pipe_mutex_unlock(rscreen->mutex_num_contexts);
}
static void r600_destroy_context(struct pipe_context *context)
@@ -79,8 +101,6 @@ static void r600_destroy_context(struct pipe_context *context)
rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush);
- r600_end_vertex_translate(rctx);
-
r600_context_fini(&rctx->ctx);
util_blitter_destroy(rctx->blitter);
@@ -89,14 +109,11 @@ static void r600_destroy_context(struct pipe_context *context)
free(rctx->states[i]);
}
- u_upload_destroy(rctx->upload_vb);
- u_upload_destroy(rctx->upload_ib);
+ u_vbuf_mgr_destroy(rctx->vbuf_mgr);
+ util_slab_destroy(&rctx->pool_transfers);
- if (rctx->tran.translate_cache)
- translate_cache_destroy(rctx->tran.translate_cache);
+ r600_update_num_contexts(rctx->screen, -1);
- FREE(rctx->ps_resource);
- FREE(rctx->vs_resource);
FREE(rctx);
}
@@ -108,6 +125,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
if (rctx == NULL)
return NULL;
+
+ r600_update_num_contexts(rscreen, 1);
+
rctx->context.winsys = rscreen->screen.winsys;
rctx->context.screen = screen;
rctx->context.priv = priv;
@@ -123,6 +143,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
r600_init_query_functions(rctx);
r600_init_context_resource_functions(rctx);
r600_init_surface_functions(rctx);
+ rctx->context.draw_vbo = r600_draw_vbo;
switch (r600_get_family(rctx->radeon)) {
case CHIP_R600:
@@ -137,7 +158,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
case CHIP_RV730:
case CHIP_RV710:
case CHIP_RV740:
- rctx->context.draw_vbo = r600_draw_vbo;
r600_init_state_functions(rctx);
if (r600_context_init(&rctx->ctx, rctx->radeon)) {
r600_destroy_context(&rctx->context);
@@ -151,7 +171,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
- rctx->context.draw_vbo = evergreen_draw;
+ case CHIP_BARTS:
+ case CHIP_TURKS:
+ case CHIP_CAICOS:
evergreen_init_state_functions(rctx);
if (evergreen_context_init(&rctx->ctx, rctx->radeon)) {
r600_destroy_context(&rctx->context);
@@ -165,41 +187,23 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
return NULL;
}
- rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16,
- PIPE_BIND_INDEX_BUFFER);
- if (rctx->upload_ib == NULL) {
- r600_destroy_context(&rctx->context);
- return NULL;
- }
+ util_slab_create(&rctx->pool_transfers,
+ sizeof(struct pipe_transfer), 64,
+ UTIL_SLAB_SINGLETHREADED);
- rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16,
- PIPE_BIND_VERTEX_BUFFER);
- if (rctx->upload_vb == NULL) {
+ rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 256,
+ PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER,
+ U_VERTEX_FETCH_DWORD_ALIGNED);
+ if (!rctx->vbuf_mgr) {
r600_destroy_context(&rctx->context);
return NULL;
}
rctx->blitter = util_blitter_create(&rctx->context);
if (rctx->blitter == NULL) {
- FREE(rctx);
- return NULL;
- }
-
- rctx->tran.translate_cache = translate_cache_create();
- if (rctx->tran.translate_cache == NULL) {
- FREE(rctx);
- return NULL;
- }
-
- rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state));
- if (!rctx->vs_resource) {
- FREE(rctx);
- return NULL;
- }
-
- rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state));
- if (!rctx->ps_resource) {
- FREE(rctx);
+ r600_destroy_context(&rctx->context);
return NULL;
}
@@ -209,8 +213,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
else
rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
- r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth;
-
return &rctx->context;
}
@@ -243,6 +245,9 @@ static const char *r600_get_family_name(enum radeon_family family)
case CHIP_CYPRESS: return "AMD CYPRESS";
case CHIP_HEMLOCK: return "AMD HEMLOCK";
case CHIP_PALM: return "AMD PALM";
+ case CHIP_BARTS: return "AMD BARTS";
+ case CHIP_TURKS: return "AMD TURKS";
+ case CHIP_CAICOS: return "AMD CAICOS";
default: return "AMD unknown";
}
}
@@ -275,18 +280,32 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_SM3:
case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_DEPTH_CLAMP:
case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
return 1;
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ /* R600 doesn't support per-MRT blends */
+ if (family == CHIP_R600)
+ return 0;
+ else
+ return 1;
/* Unsupported features (boolean caps). */
- case PIPE_CAP_TIMER_QUERY:
case PIPE_CAP_STREAM_OUTPUT:
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */
- return 0;
+ /* R600 doesn't support per-MRT blends */
+ if (family == CHIP_R600)
+ return 0;
+ else
+ return 0;
+
+ case PIPE_CAP_ARRAY_TEXTURES:
+ /* fix once the CS checker upstream is fixed */
+ return debug_get_bool_option("R600_ARRAY_TEXTURE", FALSE);
/* Texturing. */
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
@@ -316,6 +335,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
+ /* Timer queries, present when the clock frequency is non zero. */
+ case PIPE_CAP_TIMER_QUERY:
+ return r600_get_clock_crystal_freq(rscreen->radeon) != 0;
+
default:
R600_ERR("r600: unknown param %d\n", param);
return 0;
@@ -380,9 +403,9 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_MAX_ADDRS:
return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */
case PIPE_SHADER_CAP_MAX_CONSTS:
- return 256; //max native parameters
+ return R600_MAX_CONST_BUFFER_SIZE;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return 1;
+ return R600_MAX_CONST_BUFFERS;
case PIPE_SHADER_CAP_MAX_PREDS:
return 0; /* FIXME */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -403,8 +426,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned usage,
- unsigned geom_flags)
+ unsigned usage)
{
unsigned retval = 0;
if (target >= PIPE_MAX_TEXTURE_TYPES) {
@@ -417,7 +439,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
return FALSE;
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
- r600_is_sampler_format_supported(format)) {
+ r600_is_sampler_format_supported(screen, format)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
@@ -438,9 +460,14 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
retval |= PIPE_BIND_DEPTH_STENCIL;
}
- if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
- r600_is_vertex_format_supported(format))
- retval |= PIPE_BIND_VERTEX_BUFFER;
+ if (usage & PIPE_BIND_VERTEX_BUFFER) {
+ struct r600_screen *rscreen = (struct r600_screen *)screen;
+ enum radeon_family family = r600_get_family(rscreen->radeon);
+
+ if (r600_is_vertex_format_supported(format, family)) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
+ }
if (usage & PIPE_BIND_TRANSFER_READ)
retval |= PIPE_BIND_TRANSFER_READ;
@@ -459,6 +486,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
radeon_decref(rscreen->radeon);
+ util_slab_destroy(&rscreen->pool_buffers);
+ pipe_mutex_destroy(rscreen->mutex_num_contexts);
FREE(rscreen);
}
@@ -485,6 +514,13 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon)
r600_init_screen_resource_functions(&rscreen->screen);
rscreen->tiling_info = r600_get_tiling_info(radeon);
+ util_format_s3tc_init();
+
+ util_slab_create(&rscreen->pool_buffers,
+ sizeof(struct r600_resource_buffer), 64,
+ UTIL_SLAB_SINGLETHREADED);
+
+ pipe_mutex_init(rscreen->mutex_num_contexts);
return &rscreen->screen;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 485f42166d..396801e4a4 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -30,12 +30,16 @@
#include <pipe/p_screen.h>
#include <pipe/p_context.h>
#include <util/u_math.h>
-#include "translate/translate_cache.h"
+#include "util/u_slab.h"
+#include "util/u_vbuf_mgr.h"
#include "r600.h"
#include "r600_public.h"
#include "r600_shader.h"
#include "r600_resource.h"
+#define R600_MAX_CONST_BUFFERS 1
+#define R600_MAX_CONST_BUFFER_SIZE 4096
+
enum r600_pipe_state_id {
R600_PIPE_STATE_BLEND = 0,
R600_PIPE_STATE_BLEND_COLOR,
@@ -62,6 +66,11 @@ struct r600_screen {
struct pipe_screen screen;
struct radeon *radeon;
struct r600_tiling_info *tiling_info;
+ struct util_slab_mempool pool_buffers;
+ unsigned num_contexts;
+
+ /* for thread-safe write accessing to num_contexts */
+ pipe_mutex mutex_num_contexts;
};
struct r600_pipe_sampler_view {
@@ -86,9 +95,7 @@ struct r600_vertex_element
{
unsigned count;
struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS];
- enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
- unsigned hw_format_size[PIPE_MAX_ATTRIBS];
- boolean incompatible_layout;
+ struct u_vbuf_mgr_elements *vmgr_elements;
struct r600_bo *fetch_shader;
unsigned fs_size;
struct r600_pipe_state rstate;
@@ -111,30 +118,18 @@ struct r600_pipe_shader {
#define NUM_TEX_UNITS 16
struct r600_textures_info {
- struct r600_pipe_sampler_view *views[NUM_TEX_UNITS];
+ struct r600_pipe_sampler_view *views[NUM_TEX_UNITS];
unsigned n_views;
void *samplers[NUM_TEX_UNITS];
unsigned n_samplers;
};
-/* vertex buffer translation context, used to translate vertex input that
- * hw doesn't natively support, so far only FLOAT64 is unsupported.
- */
-struct r600_translate_context {
- /* Translate cache for incompatible vertex offset/stride/format fallback. */
- struct translate_cache *translate_cache;
- /* The vertex buffer slot containing the translated buffer. */
- unsigned vb_slot;
- void *new_velems;
-};
-
#define R600_CONSTANT_ARRAY_SIZE 256
#define R600_RESOURCE_ARRAY_SIZE 160
struct r600_pipe_context {
struct pipe_context context;
struct blitter_context *blitter;
- struct pipe_framebuffer_state *pframebuffer;
unsigned family;
void *custom_dsa_flush;
struct r600_screen *screen;
@@ -142,43 +137,35 @@ struct r600_pipe_context {
struct r600_pipe_state *states[R600_PIPE_NSTATES];
struct r600_context ctx;
struct r600_vertex_element *vertex_elements;
+ struct r600_pipe_state fs_resource[PIPE_MAX_ATTRIBS];
struct pipe_framebuffer_state framebuffer;
struct pipe_index_buffer index_buffer;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- unsigned nvertex_buffer;
unsigned cb_target_mask;
/* for saving when using blitter */
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
- unsigned nvs_resource;
- struct r600_pipe_state *vs_resource;
- struct r600_pipe_state *ps_resource;
struct r600_pipe_state config;
struct r600_pipe_shader *ps_shader;
struct r600_pipe_shader *vs_shader;
struct r600_pipe_state vs_const_buffer;
+ struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS];
struct r600_pipe_state ps_const_buffer;
+ struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS];
struct r600_pipe_rasterizer *rasterizer;
/* shader information */
unsigned sprite_coord_enable;
bool flatshade;
- struct u_upload_mgr *upload_vb;
- struct u_upload_mgr *upload_ib;
- unsigned any_user_vbs;
struct r600_textures_info ps_samplers;
- unsigned vb_max_index;
- struct r600_translate_context tran;
+
+ struct u_vbuf_mgr *vbuf_mgr;
+ struct util_slab_mempool pool_transfers;
+ bool blit;
};
struct r600_drawl {
+ struct pipe_draw_info info;
struct pipe_context *ctx;
- unsigned mode;
- unsigned min_index;
- unsigned max_index;
- unsigned index_bias;
- unsigned start;
- unsigned count;
unsigned index_size;
unsigned index_buffer_offset;
struct pipe_resource *index_buffer;
@@ -187,16 +174,21 @@ struct r600_drawl {
/* evergreen_state.c */
void evergreen_init_state_functions(struct r600_pipe_context *rctx);
void evergreen_init_config(struct r600_pipe_context *rctx);
-void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info);
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_fetch_shader(struct r600_vertex_element *ve);
void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
-void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx);
+void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride);
/* r600_blit.c */
void r600_init_blit_functions(struct r600_pipe_context *rctx);
-int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+void r600_flush_depth_textures(struct r600_pipe_context *rctx);
/* r600_buffer.c */
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
@@ -204,13 +196,9 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
void *ptr, unsigned bytes,
unsigned bind);
-unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
- struct pipe_resource *buf,
- unsigned level, int layer);
struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
struct winsys_handle *whandle);
-int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
-int r600_upload_user_buffers(struct r600_pipe_context *rctx);
+void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
/* r600_query.c */
void r600_init_query_functions(struct r600_pipe_context *rctx);
@@ -219,7 +207,6 @@ void r600_init_query_functions(struct r600_pipe_context *rctx);
void r600_init_context_resource_functions(struct r600_pipe_context *r600);
/* r600_shader.c */
-int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens);
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_find_vs_semantic_index(struct r600_shader *vs,
@@ -227,11 +214,17 @@ int r600_find_vs_semantic_index(struct r600_shader *vs,
/* r600_state.c */
void r600_init_state_functions(struct r600_pipe_context *rctx);
-void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
+void r600_spi_update(struct r600_pipe_context *rctx);
void r600_init_config(struct r600_pipe_context *rctx);
+void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void r600_fetch_shader(struct r600_vertex_element *ve);
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
void r600_polygon_offset_update(struct r600_pipe_context *rctx);
-void r600_vertex_buffer_update(struct r600_pipe_context *rctx);
+void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride);
/* r600_helper.h */
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
@@ -239,15 +232,13 @@ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
/* r600_texture.c */
void r600_init_screen_texture_functions(struct pipe_screen *screen);
void r600_init_surface_functions(struct r600_pipe_context *r600);
-uint32_t r600_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle_view,
+uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format,
+ const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p);
unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
unsigned level, unsigned layer);
/* r600_translate.c */
-void r600_begin_vertex_translate(struct r600_pipe_context *rctx);
-void r600_end_vertex_translate(struct r600_pipe_context *rctx);
void r600_translate_index_buffer(struct r600_pipe_context *r600,
struct pipe_resource **index_buffer,
unsigned *index_size,
@@ -270,13 +261,16 @@ void r600_sampler_view_destroy(struct pipe_context *ctx,
void r600_bind_state(struct pipe_context *ctx, void *state);
void r600_delete_state(struct pipe_context *ctx, void *state);
void r600_bind_vertex_elements(struct pipe_context *ctx, void *state);
-
void *r600_create_shader_state(struct pipe_context *ctx,
const struct pipe_shader_state *state);
void r600_bind_ps_shader(struct pipe_context *ctx, void *state);
void r600_bind_vs_shader(struct pipe_context *ctx, void *state);
void r600_delete_ps_shader(struct pipe_context *ctx, void *state);
void r600_delete_vs_shader(struct pipe_context *ctx, void *state);
+void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
+ struct pipe_resource *buffer);
+void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
+
/*
* common helpers
*/
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index 726668260c..181ea3f9e4 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -21,6 +21,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "r600_pipe.h"
+#include "r600d.h"
static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
{
@@ -61,11 +62,35 @@ static boolean r600_get_query_result(struct pipe_context *ctx,
struct r600_query *rquery = (struct r600_query *)query;
if (rquery->num_results) {
- ctx->flush(ctx, 0, NULL);
+ ctx->flush(ctx, NULL);
}
return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult);
}
+static void r600_render_condition(struct pipe_context *ctx,
+ struct pipe_query *query,
+ uint mode)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+ int wait_flag = 0;
+
+ if (!query) {
+ rctx->ctx.predicate_drawing = false;
+ r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1);
+ return;
+ }
+
+ if (mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ wait_flag = 1;
+ }
+
+ rctx->ctx.predicate_drawing = true;
+ r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
+
+}
+
void r600_init_query_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_query = r600_create_query;
@@ -73,4 +98,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx)
rctx->context.begin_query = r600_begin_query;
rctx->context.end_query = r600_end_query;
rctx->context.get_query_result = r600_get_query_result;
+
+ if (r600_get_num_backends(rctx->screen->radeon) > 0)
+ rctx->context.render_condition = r600_render_condition;
}
diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c
index 207642ccfa..f3ab3613c8 100644
--- a/src/gallium/drivers/r600/r600_resource.c
+++ b/src/gallium/drivers/r600/r600_resource.c
@@ -61,5 +61,4 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600)
r600->context.transfer_unmap = u_transfer_unmap_vtbl;
r600->context.transfer_destroy = u_transfer_destroy_vtbl;
r600->context.transfer_inline_write = u_transfer_inline_write_vtbl;
- r600->context.is_resource_referenced = u_is_resource_referenced_vtbl;
}
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 25aa84682c..836e7491f1 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -24,6 +24,7 @@
#define R600_RESOURCE_H
#include "util/u_transfer.h"
+#include "util/u_vbuf_mgr.h"
/* flag to indicate a resource is to be used as a transfer so should not be tiled */
#define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV
@@ -43,25 +44,45 @@ struct r600_transfer {
* underlying implementations.
*/
struct r600_resource {
- struct u_resource base;
+ struct u_vbuf_resource b;
struct r600_bo *bo;
u32 size;
+ unsigned bo_size;
};
struct r600_resource_texture {
struct r600_resource resource;
unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
- unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS];
- unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; /* transfer */
+ unsigned pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */
unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS];
unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS];
unsigned pitch_override;
unsigned size;
- unsigned tiled;
unsigned tile_type;
unsigned depth;
- unsigned dirty;
- struct r600_resource_texture *flushed_depth_texture;
+ unsigned dirty_db;
+ struct r600_resource_texture *flushed_depth_texture;
+ boolean is_flushing_texture;
+
+ /* on some cards we have to use integer 64/128-bit types
+ for s3tc blits, do this until gallium grows int formats */
+ boolean force_int_type;
+};
+
+#define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED)
+
+#define R600_BUFFER_MAGIC 0xabcd1600
+
+/* XXX this could be removed */
+struct r600_resource_buffer {
+ struct r600_resource r;
+ uint32_t magic;
+};
+
+struct r600_surface {
+ struct pipe_surface base;
+ unsigned aligned_height;
};
void r600_init_screen_resource_functions(struct pipe_screen *screen);
@@ -73,41 +94,17 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
const struct pipe_resource *base,
struct winsys_handle *whandle);
-#define R600_BUFFER_MAGIC 0xabcd1600
-#define R600_BUFFER_MAX_RANGES 32
-
-struct r600_buffer_range {
- uint32_t start;
- uint32_t end;
-};
-
-struct r600_resource_buffer {
- struct r600_resource r;
- uint32_t magic;
- void *user_buffer;
- struct r600_buffer_range ranges[R600_BUFFER_MAX_RANGES];
- unsigned num_ranges;
-};
-
/* r600_buffer */
static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buffer)
{
if (buffer) {
assert(((struct r600_resource_buffer *)buffer)->magic == R600_BUFFER_MAGIC);
return (struct r600_resource_buffer *)buffer;
- }
- return NULL;
-}
-
-static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer)
-{
- return r600_buffer(buffer)->user_buffer ? TRUE : FALSE;
+ }
+ return NULL;
}
-int r600_texture_depth_flush(struct pipe_context *ctx,
- struct pipe_resource *texture);
-
-extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture);
+int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture, boolean just_create);
/* r600_texture.c texture transfer functions. */
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
@@ -122,9 +119,8 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer);
-struct r600_surface {
- struct pipe_surface base;
- unsigned aligned_height;
-};
+struct r600_pipe_context;
+
+void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, uint32_t *offset);
#endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index d6455023a3..e7285d624e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -28,60 +28,12 @@
#include "r600_pipe.h"
#include "r600_asm.h"
#include "r600_sq.h"
+#include "r600_formats.h"
#include "r600_opcodes.h"
#include "r600d.h"
#include <stdio.h>
#include <errno.h>
-static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_state *rstate = &shader->rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned spi_vs_out_id[10];
- unsigned i, tmp;
-
- /* clear previous register */
- rstate->nregs = 0;
-
- /* so far never got proper semantic id from tgsi */
- /* FIXME better to move this in config things so they get emited
- * only one time per cs
- */
- for (i = 0; i < 10; i++) {
- spi_vs_out_id[i] = 0;
- }
- for (i = 0; i < 32; i++) {
- tmp = i << ((i & 3) * 8);
- spi_vs_out_id[i / 4] |= tmp;
- }
- for (i = 0; i < 10; i++) {
- r600_pipe_state_add_reg(rstate,
- R_028614_SPI_VS_OUT_ID_0 + i * 4,
- spi_vs_out_id[i], 0xFFFFFFFF, NULL);
- }
-
- r600_pipe_state_add_reg(rstate,
- R_0286C4_SPI_VS_OUT_CONFIG,
- S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028868_SQ_PGM_RESOURCES_VS,
- S_028868_NUM_GPRS(rshader->bc.ngpr) |
- S_028868_STACK_SIZE(rshader->bc.nstack),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_0288D0_SQ_PGM_CF_OFFSET_VS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028858_SQ_PGM_START_VS,
- r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
-
- r600_pipe_state_add_reg(rstate,
- R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
- 0xFFFFFFFF, NULL);
-
-}
-
int r600_find_vs_semantic_index(struct r600_shader *vs,
struct r600_shader *ps, int id)
{
@@ -96,98 +48,7 @@ int r600_find_vs_semantic_index(struct r600_shader *vs,
return 0;
}
-static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_state *rstate = &shader->rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
- int pos_index = -1, face_index = -1;
-
- rstate->nregs = 0;
-
- for (i = 0; i < rshader->ninput; i++) {
- if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
- pos_index = i;
- if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
- face_index = i;
- }
-
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_Z_EXPORT_ENABLE(1),
- S_02880C_Z_EXPORT_ENABLE(1), NULL);
- if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
- S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
- }
-
- exports_ps = 0;
- num_cout = 0;
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
- exports_ps |= 1;
- else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
- num_cout++;
- }
- }
- exports_ps |= S_028854_EXPORT_COLORS(num_cout);
- if (!exports_ps) {
- /* always at least export 1 component per pixel */
- exports_ps = 2;
- }
-
- spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
- S_0286CC_PERSP_GRADIENT_ENA(1);
- spi_input_z = 0;
- if (pos_index != -1) {
- spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
- S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
- S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
- S_0286CC_BARYC_SAMPLE_CNTL(1));
- spi_input_z |= 1;
- }
-
- spi_ps_in_control_1 = 0;
- if (face_index != -1) {
- spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
- S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
- }
-
- r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028840_SQ_PGM_START_PS,
- r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate,
- R_028850_SQ_PGM_RESOURCES_PS,
- S_028868_NUM_GPRS(rshader->bc.ngpr) |
- S_028868_STACK_SIZE(rshader->bc.nstack),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028854_SQ_PGM_EXPORTS_PS,
- exports_ps, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_0288CC_SQ_PGM_CF_OFFSET_PS,
- 0x00000000, 0xFFFFFFFF, NULL);
-
- if (rshader->uses_kill) {
- /* only set some bits here, the other bits are set in the dsa state */
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_KILL_ENABLE(1),
- S_02880C_KILL_ENABLE(1), NULL);
- }
- r600_pipe_state_add_reg(rstate,
- R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
- 0xFFFFFFFF, NULL);
-}
-
-int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_shader *rshader = &shader->shader;
@@ -225,14 +86,23 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
return 0;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
{
+ static int dump_shaders = -1;
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
int r;
-//fprintf(stderr, "--------------------------------------------------------------\n");
-//tgsi_dump(tokens, 0);
+ /* Would like some magic "get_bool_option_once" routine.
+ */
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+
+ if (dump_shaders) {
+ fprintf(stderr, "--------------------------------------------------------------\n");
+ tgsi_dump(tokens, 0);
+ }
shader->shader.family = r600_get_family(rctx->radeon);
r = r600_shader_from_tgsi(tokens, &shader->shader);
if (r) {
@@ -244,8 +114,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
R600_ERR("building bytecode failed !\n");
return r;
}
-//r600_bc_dump(&shader->shader.bc);
-//fprintf(stderr, "______________________________________________________________\n");
+ if (dump_shaders) {
+ r600_bc_dump(&shader->shader.bc);
+ fprintf(stderr, "______________________________________________________________\n");
+ }
return r600_pipe_shader(ctx, shader);
}
@@ -262,6 +134,15 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader
*/
struct r600_shader_tgsi_instruction;
+struct r600_shader_src {
+ unsigned sel;
+ unsigned swizzle[4];
+ unsigned neg;
+ unsigned abs;
+ unsigned rel;
+ uint32_t value[4];
+};
+
struct r600_shader_ctx {
struct tgsi_shader_info info;
struct tgsi_parse_context parse;
@@ -269,10 +150,11 @@ struct r600_shader_ctx {
unsigned type;
unsigned file_offset[TGSI_FILE_COUNT];
unsigned temp_reg;
+ unsigned ar_reg;
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bc *bc;
struct r600_shader *shader;
- u32 value[4];
+ struct r600_shader_src src[3];
u32 *literals;
u32 nliterals;
u32 max_driver_temp_used;
@@ -391,6 +273,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
{
struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
unsigned i;
+ int r;
switch (d->Declaration.File) {
case TGSI_FILE_INPUT:
@@ -422,6 +305,26 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_SAMPLER:
case TGSI_FILE_ADDRESS:
break;
+
+ case TGSI_FILE_SYSTEM_VALUE:
+ if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+ struct r600_bc_alu alu;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 3;
+
+ alu.dst.sel = 0;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+ break;
+ }
+
default:
R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
return -EINVAL;
@@ -481,9 +384,187 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
return ctx->num_interp_gpr;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
+static void tgsi_src(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_src_register *tgsi_src,
+ struct r600_shader_src *r600_src)
+{
+ memset(r600_src, 0, sizeof(*r600_src));
+ r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
+ r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
+ r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
+ r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
+ r600_src->neg = tgsi_src->Register.Negate;
+ r600_src->abs = tgsi_src->Register.Absolute;
+
+ if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
+ int index;
+ if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
+
+ index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
+ r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+ if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
+ return;
+ }
+ index = tgsi_src->Register.Index;
+ r600_src->sel = V_SQ_ALU_SRC_LITERAL;
+ memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
+ } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
+ /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
+ r600_src->swizzle[0] = 3;
+ r600_src->swizzle[1] = 3;
+ r600_src->swizzle[2] = 3;
+ r600_src->swizzle[3] = 3;
+ r600_src->sel = 0;
+ } else {
+ if (tgsi_src->Register.Indirect)
+ r600_src->rel = V_SQ_REL_RELATIVE;
+ r600_src->sel = tgsi_src->Register.Index;
+ r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+ }
+}
+
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
+{
+ struct r600_bc_vtx vtx;
+ unsigned int ar_reg;
+ int r;
+
+ if (offset) {
+ struct r600_bc_alu alu;
+
+ memset(&alu, 0, sizeof(alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
+ alu.src[0].sel = ctx->ar_reg;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = offset;
+
+ alu.dst.sel = dst_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+
+ ar_reg = dst_reg;
+ } else {
+ ar_reg = ctx->ar_reg;
+ }
+
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.src_gpr = ar_reg;
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = dst_reg;
+ vtx.dst_sel_x = 0; /* SEL_X */
+ vtx.dst_sel_y = 1; /* SEL_Y */
+ vtx.dst_sel_z = 2; /* SEL_Z */
+ vtx.dst_sel_w = 3; /* SEL_W */
+ vtx.data_format = FMT_32_32_32_32_FLOAT;
+ vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
+ vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
+ vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+ if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
+ return r;
+
+ return 0;
+}
+
+static int tgsi_split_constant(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nconst, r;
+
+ for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
+ nconst++;
+ }
+ tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
+ }
+ for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
+ continue;
+ }
+
+ if (ctx->src[i].rel) {
+ int treg = r600_get_temp(ctx);
+ if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
+ return r;
+
+ ctx->src[i].sel = treg;
+ ctx->src[i].rel = 0;
+ j--;
+ } else if (j > 0) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = ctx->src[i].sel;
+ alu.src[0].chan = k;
+ alu.src[0].rel = ctx->src[i].rel;
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ctx->src[i].sel = treg;
+ ctx->src[i].rel =0;
+ j--;
+ }
+ }
+ return 0;
+}
+
+/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
+static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nliteral, r;
+
+ for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ nliteral++;
+ }
+ }
+ for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = ctx->src[i].sel;
+ alu.src[0].chan = k;
+ alu.src[0].value = ctx->src[i].value[k];
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ctx->src[i].sel = treg;
+ j--;
+ }
+ }
+ return 0;
+}
+
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
{
struct tgsi_full_immediate *immediate;
+ struct tgsi_full_property *property;
struct r600_shader_ctx ctx;
struct r600_bc_output output[32];
unsigned output_done, noutput;
@@ -506,7 +587,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
/* Values [0,127] correspond to GPR[0..127].
* Values [128,159] correspond to constant buffer bank 0
* Values [160,191] correspond to constant buffer bank 1
- * Values [256,511] correspond to cfile constants c[0..255].
+ * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
+ * Values [256,287] correspond to constant buffer bank 2 (EG)
+ * Values [288,319] correspond to constant buffer bank 3 (EG)
* Other special values are shown in the list below.
* 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
* 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
@@ -540,15 +623,18 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
ctx.info.file_count[TGSI_FILE_OUTPUT];
- ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
+ /* Outside the GPR range. This will be translated to one of the
+ * kcache banks later. */
+ ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
- ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
- ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
+ ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
+ ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_count[TGSI_FILE_TEMPORARY];
+ ctx.temp_reg = ctx.ar_reg + 1;
ctx.nliterals = 0;
ctx.literals = NULL;
-
+ shader->fs_write_all = FALSE;
while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
tgsi_parse_token(&ctx.parse);
switch (ctx.parse.FullToken.Token.Type) {
@@ -577,7 +663,12 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.max_driver_temp_used = 0;
/* reserve first tmp for everyone */
r600_get_temp(&ctx);
+
opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
+ if ((r = tgsi_split_constant(&ctx)))
+ goto out_err;
+ if ((r = tgsi_split_literal_constant(&ctx)))
+ goto out_err;
if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
else
@@ -585,9 +676,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
r = ctx.inst_info->process(&ctx);
if (r)
goto out_err;
- r = r600_bc_add_literal(ctx.bc, ctx.value);
- if (r)
- goto out_err;
+ break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ property = &ctx.parse.FullToken.FullProperty;
+ if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
+ if (property->u[0].Data == 1)
+ shader->fs_write_all = TRUE;
+ }
break;
default:
R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
@@ -605,6 +700,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].barrier = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = i - pos0;
@@ -668,6 +764,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].barrier = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = 0;
@@ -684,6 +781,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[0].swizzle_y = 7;
output[0].swizzle_z = 7;
output[0].swizzle_w = 7;
+ output[0].burst_count = 1;
output[0].barrier = 1;
output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[0].array_base = 0;
@@ -726,33 +824,22 @@ static int tgsi_end(struct r600_shader_ctx *ctx)
return 0;
}
-static int tgsi_src(struct r600_shader_ctx *ctx,
- const struct tgsi_full_src_register *tgsi_src,
- struct r600_bc_alu_src *r600_src)
+static void r600_bc_src(struct r600_bc_alu_src *bc_src,
+ const struct r600_shader_src *shader_src,
+ unsigned chan)
{
- int index;
- memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
- r600_src->sel = tgsi_src->Register.Index;
- if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
- r600_src->sel = 0;
- index = tgsi_src->Register.Index;
- ctx->value[0] = ctx->literals[index * 4 + 0];
- ctx->value[1] = ctx->literals[index * 4 + 1];
- ctx->value[2] = ctx->literals[index * 4 + 2];
- ctx->value[3] = ctx->literals[index * 4 + 3];
- }
- if (tgsi_src->Register.Indirect)
- r600_src->rel = V_SQ_REL_RELATIVE;
- r600_src->neg = tgsi_src->Register.Negate;
- r600_src->abs = tgsi_src->Register.Absolute;
- r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
- return 0;
+ bc_src->sel = shader_src->sel;
+ bc_src->chan = shader_src->swizzle[chan];
+ bc_src->neg = shader_src->neg;
+ bc_src->abs = shader_src->abs;
+ bc_src->rel = shader_src->rel;
+ bc_src->value = shader_src->value[bc_src->chan];
}
-static int tgsi_dst(struct r600_shader_ctx *ctx,
- const struct tgsi_full_dst_register *tgsi_dst,
- unsigned swizzle,
- struct r600_bc_alu_dst *r600_dst)
+static void tgsi_dst(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_dst_register *tgsi_dst,
+ unsigned swizzle,
+ struct r600_bc_alu_dst *r600_dst)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -765,146 +852,42 @@ static int tgsi_dst(struct r600_shader_ctx *ctx,
if (inst->Instruction.Saturate) {
r600_dst->clamp = 1;
}
- return 0;
}
-static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
+static int tgsi_last_instruction(unsigned writemask)
{
- switch (swizzle) {
- case 0:
- return tgsi_src->Register.SwizzleX;
- case 1:
- return tgsi_src->Register.SwizzleY;
- case 2:
- return tgsi_src->Register.SwizzleZ;
- case 3:
- return tgsi_src->Register.SwizzleW;
- default:
- return 0;
- }
-}
+ int i, lasti = 0;
-static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, k, nconst, r;
-
- for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
- nconst++;
- }
- r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
- if (r) {
- return r;
- }
- }
- for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
- int treg = r600_get_temp(ctx);
- for (k = 0; k < 4; k++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[i].sel;
- alu.src[0].chan = k;
- alu.src[0].rel = r600_src[i].rel;
- alu.dst.sel = treg;
- alu.dst.chan = k;
- alu.dst.write = 1;
- if (k == 3)
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- r600_src[i].sel = treg;
- r600_src[i].rel =0;
- j--;
- }
- }
- return 0;
-}
-
-/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
-static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, k, nliteral, r;
-
- for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
- nliteral++;
- }
- }
- for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
- int treg = r600_get_temp(ctx);
- for (k = 0; k < 4; k++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[i].sel;
- alu.src[0].chan = k;
- alu.dst.sel = treg;
- alu.dst.chan = k;
- alu.dst.write = 1;
- if (k == 3)
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
- if (r)
- return r;
- r600_src[i].sel = treg;
- j--;
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1 << i)) {
+ lasti = i;
}
}
- return 0;
+ return lasti;
}
static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
- int lasti = 0;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- for (i = 0; i < 4; i++) {
- if (inst->Dst[0].Register.WriteMask & (1 << i)) {
- lasti = i;
- }
- }
-
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bc_alu));
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.inst = ctx->inst_info->r600_opcode;
if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
} else {
- alu.src[0] = r600_src[1];
- alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
-
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
}
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
@@ -942,25 +925,15 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
* r700 - normalize by dividing by 2PI
* see fdo bug 27901
*/
-static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
- struct r600_bc_alu_src r600_src[3])
+static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ static float half_inv_pi = 1.0 /(3.1415926535 * 2);
+ static float double_pi = 3.1415926535 * 2;
+ static float neg_pi = -3.1415926535;
+
int r;
- uint32_t lit_vals[4];
struct r600_bc_alu alu;
- memset(lit_vals, 0, 4*4);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
- lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
- lit_vals[1] = fui(0.5f);
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -969,20 +942,17 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].chan = 0;
- alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[2].chan = 1;
+ alu.src[1].value = *(uint32_t *)&half_inv_pi;
+ alu.src[2].sel = V_SQ_ALU_SRC_0_5;
+ alu.src[2].chan = 0;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
@@ -998,14 +968,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
if (r)
return r;
- if (ctx->bc->chiprev == CHIPREV_R600) {
- lit_vals[0] = fui(3.1415926535897f * 2.0f);
- lit_vals[1] = fui(-3.1415926535897f);
- } else {
- lit_vals[0] = fui(1.0f);
- lit_vals[1] = fui(-0.5f);
- }
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -1020,26 +982,32 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].chan = 0;
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[2].chan = 1;
+ alu.src[2].chan = 0;
+
+ if (ctx->bc->chiprev == CHIPREV_R600) {
+ alu.src[1].value = *(uint32_t *)&double_pi;
+ alu.src[2].value = *(uint32_t *)&neg_pi;
+ } else {
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ alu.src[2].sel = V_SQ_ALU_SRC_0_5;
+ alu.src[2].neg = 1;
+ }
+
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
return 0;
}
static int tgsi_trig(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
- int lasti = 0;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_setup_trig(ctx, r600_src);
+ r = tgsi_setup_trig(ctx);
if (r)
return r;
@@ -1057,10 +1025,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
return r;
/* replicate result */
- for (i = 0; i < 4; i++) {
- if (inst->Dst[0].Register.WriteMask & (1 << i))
- lasti = i;
- }
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
@@ -1069,9 +1033,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = ctx->temp_reg;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (i == lasti)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -1084,7 +1046,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
static int tgsi_scs(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int r;
@@ -1092,7 +1053,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
* X or Y components of the destination vector.
*/
if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
- r = tgsi_setup_trig(ctx, r600_src);
+ r = tgsi_setup_trig(ctx);
if (r)
return r;
}
@@ -1101,9 +1062,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
- r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -1117,9 +1076,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
- r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -1135,9 +1092,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = 0;
@@ -1147,10 +1102,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* dst.w = 1.0; */
@@ -1159,9 +1110,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
@@ -1171,10 +1120,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return 0;
@@ -1182,7 +1127,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
static int tgsi_kill(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int i, r;
@@ -1198,10 +1142,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
alu.src[1].sel = V_SQ_ALU_SRC_1;
alu.src[1].neg = 1;
} else {
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
}
if (i == 3) {
alu.last = 1;
@@ -1210,9 +1151,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* kill must be last in ALU */
ctx->bc->force_add_cf = 1;
@@ -1224,24 +1162,14 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- struct r600_bc_alu_src r600_src[3];
int r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* dst.x, <- 1.0 */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1250,12 +1178,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.y = max(src.x, 0.0) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
- alu.src[0] = r600_src[0];
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
alu.src[1].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1266,19 +1192,13 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
if (inst->Dst[0].Register.WriteMask & (1 << 2))
{
int chan;
@@ -1287,33 +1207,24 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.z = log(src.y) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
chan = alu.dst.chan;
sel = alu.dst.sel;
/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
alu.src[1].sel = sel;
alu.src[1].chan = chan;
- alu.src[2] = r600_src[0];
- alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[2], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
alu.dst.write = 1;
@@ -1323,17 +1234,12 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* dst.z = exp(tmp.x) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1357,10 +1263,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
- if (r)
- return r;
- alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
+ r600_bc_src(&alu.src[i], &ctx->src[i], 0);
alu.src[i].abs = 1;
}
alu.dst.sel = ctx->temp_reg;
@@ -1369,9 +1272,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* replicate result */
return tgsi_helper_tempx_replicate(ctx);
}
@@ -1387,9 +1287,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
alu.src[0].sel = ctx->temp_reg;
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.dst.chan = i;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
if (i == 3)
alu.last = 1;
@@ -1409,10 +1307,7 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
- if (r)
- return r;
- alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
+ r600_bc_src(&alu.src[i], &ctx->src[i], 0);
}
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -1420,42 +1315,29 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* replicate result */
return tgsi_helper_tempx_replicate(ctx);
}
static int tgsi_pow(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
/* LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
/* b * LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
- r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
+ r600_bc_src(&alu.src[0], &ctx->src[1], 0);
alu.src[1].sel = ctx->temp_reg;
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -1463,9 +1345,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
/* POW(a,b) = EXP2(b * LOG2(a))*/
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -1476,9 +1355,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
return tgsi_helper_tempx_replicate(ctx);
}
@@ -1486,16 +1362,8 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- struct r600_bc_alu_src r600_src[3];
int i, r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* tmp = (src > 0 ? 1 : src) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -1505,31 +1373,23 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
-
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
alu.src[1].sel = V_SQ_ALU_SRC_1;
+ r600_bc_src(&alu.src[2], &ctx->src[0], i);
- alu.src[2] = r600_src[0];
- alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
if (i == 3)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* dst = (-tmp > 0 ? -1 : tmp) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
alu.is_op3 = 1;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
@@ -1555,9 +1415,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
struct r600_bc_alu alu;
int i, r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
@@ -1565,9 +1422,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
alu.dst.chan = i;
} else {
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
}
@@ -1584,61 +1439,50 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
static int tgsi_op3(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
- /* do it in 2 step as op3 doesn't support writemask */
- for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
- alu.dst.sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_dp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
- alu.dst.sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
- alu.dst.write = 1;
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
case TGSI_OPCODE_DP2:
@@ -1670,19 +1514,21 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_tex(struct r600_shader_ctx *ctx)
{
+ static float one_point_five = 1.5f;
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_tex tex;
struct r600_bc_alu alu;
unsigned src_gpr;
int r, i;
int opcode;
- boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
- uint32_t lit_vals[4];
+ boolean src_not_temp =
+ inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
+ inst->Src[0].Register.File != TGSI_FILE_INPUT;
src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
@@ -1690,11 +1536,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
/* Add perspective divide */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 3;
alu.last = 1;
@@ -1708,10 +1551,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 3;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
alu.dst.write = 1;
@@ -1735,43 +1575,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
}
if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
- int src_chan, src2_chan;
+ static const unsigned src0_swizzle[] = {2, 2, 0, 1};
+ static const unsigned src1_swizzle[] = {1, 0, 2, 2};
/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
- switch (i) {
- case 0:
- src_chan = 2;
- src2_chan = 1;
- break;
- case 1:
- src_chan = 2;
- src2_chan = 0;
- break;
- case 2:
- src_chan = 0;
- src2_chan = 2;
- break;
- case 3:
- src_chan = 1;
- src2_chan = 2;
- break;
- default:
- assert(0);
- src_chan = 0;
- src2_chan = 0;
- break;
- }
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
+ r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
+ r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
@@ -1811,6 +1623,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
+ alu.src[2].value = *(uint32_t *)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -1831,6 +1644,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
+ alu.src[2].value = *(uint32_t *)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
@@ -1841,11 +1655,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
if (r)
return r;
- lit_vals[0] = fui(1.5f);
-
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
src_not_temp = FALSE;
src_gpr = ctx->temp_reg;
}
@@ -1854,8 +1663,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = src_gpr;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
@@ -1876,7 +1684,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
memset(&tex, 0, sizeof(struct r600_bc_tex));
tex.inst = opcode;
tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
- tex.resource_id = tex.sampler_id;
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
@@ -1902,6 +1710,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.coord_type_w = 1;
}
+ if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
+ tex.coord_type_z = 0;
+ tex.src_sel_z = 1;
+ } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
+ tex.coord_type_z = 0;
+
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
tex.src_sel_w = 2;
@@ -1916,29 +1730,48 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
static int tgsi_lrp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
unsigned i;
int r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
+ /* optimize if it's just an equal balance */
+ if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
+ r600_bc_src(&alu.src[0], &ctx->src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
+ alu.omod = 3;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.chan = i;
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+ }
+
/* 1 - src0 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.src[1].neg = 1;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
alu.dst.write = 1;
@@ -1946,21 +1779,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* (1 - src0) * src2 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
alu.dst.write = 1;
@@ -1968,88 +1800,66 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* src0 * src1 + (1 - src0) * src2 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
- alu.src[1] = r600_src[1];
- alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[1], i);
alu.src[2].sel = ctx->temp_reg;
alu.src[2].chan = i;
- alu.dst.sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_cmp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
- int use_temp = 0;
int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
- if (inst->Dst[0].Register.WriteMask != 0xf)
- use_temp = 1;
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
- for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
-
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
-
- alu.src[2] = r600_src[1];
- alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
-
- if (use_temp)
- alu.dst.sel = ctx->temp_reg;
- else {
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
+ r600_bc_src(&alu.src[2], &ctx->src[1], i);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
- if (i == 3)
+ if (i == lasti)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- if (use_temp)
- return tgsi_helper_copy(ctx, inst);
return 0;
}
static int tgsi_xpd(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
+ static const unsigned int src0_swizzle[] = {2, 0, 1};
+ static const unsigned int src1_swizzle[] = {1, 2, 0};
struct r600_bc_alu alu;
uint32_t use_temp = 0;
int i, r;
@@ -2057,45 +1867,15 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask != 0xf)
use_temp = 1;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
-
- alu.src[0] = r600_src[0];
- switch (i) {
- case 0:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
- break;
- case 1:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
- break;
- case 2:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- break;
- case 3:
+ if (i < 3) {
+ r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
+ r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
+ } else {
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = i;
- }
-
- alu.src[1] = r600_src[1];
- switch (i) {
- case 0:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
- break;
- case 1:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
- break;
- case 2:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
- break;
- case 3:
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = i;
}
@@ -2109,44 +1889,18 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
- alu.src[0] = r600_src[0];
- switch (i) {
- case 0:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- break;
- case 1:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
- break;
- case 2:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
- break;
- case 3:
+ if (i < 3) {
+ r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
+ r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
+ } else {
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = i;
- }
-
- alu.src[1] = r600_src[1];
- switch (i) {
- case 0:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
- break;
- case 1:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
- break;
- case 2:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
- break;
- case 3:
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = i;
}
@@ -2157,11 +1911,8 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
if (use_temp)
alu.dst.sel = ctx->temp_reg;
- else {
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
+ else
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
@@ -2170,10 +1921,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
if (use_temp)
return tgsi_helper_copy(ctx, inst);
@@ -2183,7 +1930,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
static int tgsi_exp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3] = { { 0 } };
struct r600_bc_alu alu;
int r;
@@ -2192,11 +1938,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -2206,10 +1948,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -2221,10 +1959,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.y = tmp - floor(tmp); */
@@ -2232,11 +1966,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
- alu.src[0] = r600_src[0];
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -2250,19 +1980,13 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.z = RoughApprox2ToX(tmp);*/
if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -2273,9 +1997,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.w = 1.0;*/
@@ -2293,9 +2014,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return tgsi_helper_copy(ctx, inst);
}
@@ -2311,11 +2029,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -2325,10 +2039,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -2341,10 +2051,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.y = src.x / (2 ^ floor(log2(src.x))); */
@@ -2352,11 +2058,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
@@ -2367,10 +2069,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
@@ -2386,10 +2084,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -2405,10 +2099,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
@@ -2424,19 +2114,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = ctx->temp_reg;
alu.src[1].chan = 1;
@@ -2449,10 +2131,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.z = log2(src);*/
@@ -2460,11 +2138,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -2474,10 +2148,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.w = 1.0; */
@@ -2496,10 +2166,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return tgsi_helper_copy(ctx, inst);
@@ -2510,6 +2176,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
switch (inst->Instruction.Opcode) {
@@ -2524,26 +2191,26 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
return -1;
}
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.last = 1;
- alu.dst.chan = 0;
- alu.dst.sel = ctx->temp_reg;
+ alu.dst.sel = ctx->ar_reg;
alu.dst.write = 1;
- r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
+
+ /* TODO: Note that the MOVA can be avoided if we never use AR for
+ * indexing non-CB registers in the current ALU clause. Similarly, we
+ * need to load AR from ar_reg again if we started a new clause
+ * between ARL and AR usage. The easy way to do that is to remove
+ * the MOVA here, and load it for the first AR access after ar_reg
+ * has been modified in each clause. */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].sel = ctx->ar_reg;
alu.src[0].chan = 0;
alu.last = 1;
- r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
return 0;
@@ -2554,29 +2221,51 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+ alu.src[0].sel = ctx->ar_reg;
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
break;
case TGSI_OPCODE_ARR:
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
break;
default:
assert(0);
return -1;
}
-
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
-
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
+ alu.src[0].sel = ctx->ar_reg;
alu.last = 1;
- r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
ctx->bc->cf_last->r6xx_uses_waterfall = 1;
@@ -2593,26 +2282,18 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (i == 0 || i == 3) {
alu.src[0].sel = V_SQ_ALU_SRC_1;
} else {
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
}
- if (i == 0 || i == 2) {
+ if (i == 0 || i == 2) {
alu.src[1].sel = V_SQ_ALU_SRC_1;
} else {
- r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[1], i);
}
if (i == 3)
alu.last = 1;
@@ -2625,7 +2306,6 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
@@ -2637,10 +2317,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
alu.dst.write = 1;
alu.dst.chan = 0;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = 0;
@@ -2654,9 +2331,25 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
static int pops(struct r600_shader_ctx *ctx, int pops)
{
- r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
- ctx->bc->cf_last->pop_count = pops;
- ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+ int alu_pop = 3;
+ if (ctx->bc->cf_last) {
+ if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
+ alu_pop = 0;
+ else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
+ alu_pop = 1;
+ }
+ alu_pop += pops;
+ if (alu_pop == 1) {
+ ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
+ ctx->bc->force_add_cf = 1;
+ } else if (alu_pop == 2) {
+ ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
+ ctx->bc->force_add_cf = 1;
+ } else {
+ r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
+ ctx->bc->cf_last->pop_count = pops;
+ ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+ }
return 0;
}
@@ -3002,7 +2695,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -3075,7 +2768,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
- {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
{TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
{TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
{TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
@@ -3160,7 +2853,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 35b0331525..8f96ce5085 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -45,8 +45,7 @@ struct r600_shader {
struct r600_shader_io output[32];
enum radeon_family family;
boolean uses_kill;
+ boolean fs_write_all;
};
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
-
#endif
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 0573e63dc8..56ed35e8b3 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -74,6 +74,10 @@
#define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30)
#define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3)
#define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF
+#define V_SQ_CF_KCACHE_NOP 0x00000000
+#define V_SQ_CF_KCACHE_LOCK_1 0x00000001
+#define V_SQ_CF_KCACHE_LOCK_2 0x00000002
+#define V_SQ_CF_KCACHE_LOCK_LOOP_INDEX 0x00000003
#define P_SQ_CF_ALU_WORD1
#define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0)
#define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3)
@@ -187,6 +191,8 @@
#define V_SQ_ALU_SRC_M_1_INT 0x000000FB
#define V_SQ_ALU_SRC_0_5 0x000000FC
#define V_SQ_ALU_SRC_LITERAL 0x000000FD
+#define V_SQ_ALU_SRC_PV 0x000000FE
+#define V_SQ_ALU_SRC_PS 0x000000FF
#define V_SQ_ALU_SRC_PARAM_BASE 0x000001C0
#define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9)
#define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1)
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 553d786d65..d3adf0393c 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -36,8 +36,8 @@
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
-#include <util/u_upload_mgr.h>
#include <util/u_framebuffer.h>
+#include "util/u_transfer.h"
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "r600d.h"
@@ -95,230 +95,6 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx)
}
}
-/* FIXME optimize away spi update when it's not needed */
-static void r600_spi_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_shader *shader = rctx->ps_shader;
- struct r600_pipe_state rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp;
-
- rstate.nregs = 0;
- for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
- if (rshader->input[i].centroid)
- tmp |= S_028644_SEL_CENTROID(1);
- if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
- tmp |= S_028644_SEL_LINEAR(1);
-
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
- }
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
- }
- r600_context_pipe_state_set(&rctx->ctx, &rstate);
-}
-
-void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_state *rstate;
- struct r600_resource *rbuffer;
- struct pipe_vertex_buffer *vertex_buffer;
- unsigned i, offset;
-
- /* we don't update until we know vertex elements */
- if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
- return;
-
- /* delete previous translated vertex elements */
- if (rctx->tran.new_velems) {
- r600_end_vertex_translate(rctx);
- }
-
- if (rctx->vertex_elements->incompatible_layout) {
- /* translate rebind new vertex elements so
- * return once translated
- */
- r600_begin_vertex_translate(rctx);
- return;
- }
-
- if (rctx->any_user_vbs) {
- r600_upload_user_buffers(rctx);
- rctx->any_user_vbs = FALSE;
- }
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- rctx->nvs_resource = rctx->vertex_elements->count;
- } else {
- /* bind vertex buffer once */
- rctx->nvs_resource = rctx->nvertex_buffer;
- }
-
- for (i = 0 ; i < rctx->nvs_resource; i++) {
- rstate = &rctx->vs_resource[i];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- unsigned vbuffer_index;
- vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->vbuffer_offset[i] +
- vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
- } else {
- /* bind vertex buffer once */
- vertex_buffer = &rctx->vertex_buffer[i];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
- }
-
- r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
- offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
- rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
- S_038008_STRIDE(vertex_buffer->stride),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
- 0xC0000000, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
- }
-}
-
-static void r600_draw_common(struct r600_drawl *draw)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx;
- struct r600_resource *rbuffer;
- unsigned prim;
- u32 vgt_dma_index_type, vgt_draw_initiator, mask;
- struct r600_draw rdraw;
- struct r600_pipe_state vgt;
-
- switch (draw->index_size) {
- case 2:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 0;
- break;
- case 4:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 1;
- break;
- case 0:
- vgt_draw_initiator = 2;
- vgt_dma_index_type = 0;
- break;
- default:
- R600_ERR("unsupported index size %d\n", draw->index_size);
- return;
- }
- if (r600_conv_pipe_prim(draw->mode, &prim))
- return;
- if (unlikely(rctx->ps_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- if (unlikely(rctx->vs_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- /* there should be enough input */
- if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
- return;
- }
-
- r600_spi_update(rctx);
-
- mask = 0;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- mask |= (0xF << (i * 4));
- }
-
- vgt.id = R600_PIPE_STATE_VGT;
- vgt.nregs = 0;
- r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set(&rctx->ctx, &vgt);
-
- rdraw.vgt_num_indices = draw->count;
- rdraw.vgt_num_instances = 1;
- rdraw.vgt_index_type = vgt_dma_index_type;
- rdraw.vgt_draw_initiator = vgt_draw_initiator;
- rdraw.indices = NULL;
- if (draw->index_buffer) {
- rbuffer = (struct r600_resource*)draw->index_buffer;
- rdraw.indices = rbuffer->bo;
- rdraw.indices_bo_offset = draw->index_buffer_offset;
- }
- r600_context_draw(&rctx->ctx, &rdraw);
-}
-
-void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_drawl draw;
- boolean translate = FALSE;
-
- memset(&draw, 0, sizeof(struct r600_drawl));
- draw.ctx = ctx;
- draw.mode = info->mode;
- draw.start = info->start;
- draw.count = info->count;
- if (info->indexed && rctx->index_buffer.buffer) {
- draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->index_bias;
-
- r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer,
- &rctx->index_buffer.index_size,
- &draw.start,
- info->count);
-
- draw.index_size = rctx->index_buffer.index_size;
- pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
- draw.index_buffer_offset = draw.start * draw.index_size;
- draw.start = 0;
- r600_upload_index_buffer(rctx, &draw);
- } else {
- draw.index_size = 0;
- draw.index_buffer = NULL;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->start;
- }
- r600_draw_common(&draw);
-
- if (translate)
- r600_end_vertex_translate(rctx);
-
- pipe_resource_reference(&draw.index_buffer, NULL);
-}
-
static void r600_set_blend_color(struct pipe_context *ctx,
const struct pipe_blend_color *state)
{
@@ -341,9 +117,10 @@ static void r600_set_blend_color(struct pipe_context *ctx,
static void *r600_create_blend_state(struct pipe_context *ctx,
const struct pipe_blend_state *state)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend);
struct r600_pipe_state *rstate;
- u32 color_control, target_mask;
+ u32 color_control = 0, target_mask;
if (blend == NULL) {
return NULL;
@@ -353,7 +130,10 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_BLEND;
target_mask = 0;
- color_control = S_028808_PER_MRT_BLEND(1);
+
+ /* R600 does not support per-MRT blends */
+ if (rctx->family > CHIP_R600)
+ color_control |= S_028808_PER_MRT_BLEND(1);
if (state->logicop_enable) {
color_control |= (state->logicop_func << 16) | (state->logicop_func << 20);
} else {
@@ -376,8 +156,9 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
}
}
blend->cb_target_mask = target_mask;
+ /* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */
r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
- color_control, 0xFFFFFFFF, NULL);
+ color_control, 0xFFFFFFFD, NULL);
for (int i = 0; i < 8; i++) {
unsigned eqRGB = state->rt[i].rgb_func;
@@ -403,10 +184,11 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA));
}
- r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL);
- if (i == 0) {
+ /* R600 does not support per-MRT blends */
+ if (rctx->family > CHIP_R600)
+ r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL);
+ if (i == 0)
r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL);
- }
}
return rstate;
}
@@ -424,10 +206,6 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_DSA;
/* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */
- /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be
- * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will
- * be set if shader use texkill instruction
- */
db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
stencil_ref_mask = 0;
stencil_ref_mask_bf = 0;
@@ -486,7 +264,10 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL);
+ /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE,
+ * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by
+ * r600_pipe_shader_ps().*/
+ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL);
@@ -582,11 +363,16 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
{
struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
union util_color uc;
+ uint32_t coord_trunc = 0;
if (rstate == NULL) {
return NULL;
}
+ if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) ||
+ (state->min_img_filter == PIPE_TEX_FILTER_NEAREST))
+ coord_trunc = 1;
+
rstate->id = R600_PIPE_STATE_SAMPLER;
util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
@@ -603,7 +389,9 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
+ S_03C008_MC_COORD_TRUNCATE(coord_trunc) |
+ S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
if (uc.ui) {
r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
@@ -626,6 +414,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
unsigned char swizzle[4], array_mode = 0, tile_type = 0;
struct r600_bo *bo[2];
+ unsigned height, depth;
if (resource == NULL)
return NULL;
@@ -643,7 +432,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(state->format,
+ format = r600_translate_texformat(ctx->screen, state->format,
swizzle,
&word4, &yuv_format);
if (format == ~0) {
@@ -653,22 +442,30 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
if (desc == NULL) {
R600_ERR("unknow format %d\n", state->format);
}
- tmp = (struct r600_resource_texture*)texture;
+ tmp = (struct r600_resource_texture *)texture;
+ if (tmp->depth && !tmp->is_flushing_texture) {
+ r600_texture_depth_flush(ctx, texture, TRUE);
+ tmp = tmp->flushed_depth_texture;
+ }
+
+ if (tmp->force_int_type) {
+ word4 &= C_038010_NUM_FORMAT_ALL;
+ word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
+ }
rbuffer = &tmp->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
- /* FIXME depth texture decompression */
- if (tmp->depth) {
- r600_texture_depth_flush(ctx, texture);
- tmp = (struct r600_resource_texture*)texture;
- rbuffer = &tmp->flushed_depth_texture->resource;
- bo[0] = rbuffer->bo;
- bo[1] = rbuffer->bo;
- }
- pitch = align(tmp->pitch_in_pixels[0], 8);
- if (tmp->tiled) {
- array_mode = tmp->array_mode[0];
- tile_type = tmp->tile_type;
+ pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8);
+ array_mode = tmp->array_mode[0];
+ tile_type = tmp->tile_type;
+
+ height = texture->height0;
+ depth = texture->depth0;
+ if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
+ height = 1;
+ depth = texture->array_size;
+ } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
+ depth = texture->array_size;
}
/* FIXME properly handle first level != 0 */
@@ -679,22 +476,22 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
S_038000_PITCH((pitch / 8) - 1) |
S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
- S_038004_TEX_HEIGHT(texture->height0 - 1) |
- S_038004_TEX_DEPTH(texture->depth0 - 1) |
+ S_038004_TEX_HEIGHT(height - 1) |
+ S_038004_TEX_DEPTH(depth - 1) |
S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
(tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
(tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
- word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
- S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) |
+ word4 |
+ S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) |
S_038010_REQUEST_SIZE(1) |
S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
S_038014_LAST_LEVEL(state->u.tex.last_level) |
- S_038014_BASE_ARRAY(0) |
- S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL);
+ S_038014_BASE_ARRAY(state->u.tex.first_layer) |
+ S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL);
@@ -709,7 +506,8 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
for (int i = 0; i < count; i++) {
if (resource[i]) {
- r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i);
+ r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
}
}
}
@@ -724,9 +522,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
for (i = 0; i < count; i++) {
if (&rctx->ps_samplers.views[i]->base != views[i]) {
if (resource[i])
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
else
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference(
(struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
@@ -736,7 +536,8 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
}
for (i = count; i < NUM_TEX_UNITS; i++) {
if (rctx->ps_samplers.views[i]) {
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
}
}
@@ -918,33 +719,55 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
unsigned offset;
const struct util_format_description *desc;
struct r600_bo *bo[3];
+ int i;
surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
+
+ if (rtex->depth && !rtex->is_flushing_texture) {
+ r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE);
+ rtex = rtex->flushed_depth_texture;
+ }
+
rbuffer = &rtex->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
bo[2] = rbuffer->bo;
/* XXX quite sure for dx10+ hw don't need any offset hacks */
- offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture,
+ offset = r600_texture_get_offset(rtex,
level, state->cbufs[cb]->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
ntype = 0;
- desc = util_format_description(rtex->resource.base.b.format);
+ desc = util_format_description(surf->base.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
ntype = V_0280A0_NUMBER_SRGB;
- format = r600_translate_colorformat(rtex->resource.base.b.format);
- swap = r600_translate_colorswap(rtex->resource.base.b.format);
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ format = r600_translate_colorformat(surf->base.format);
+ swap = r600_translate_colorswap(surf->base.format);
+
+ /* disable when gallium grows int textures */
+ if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type)
+ ntype = 4;
+
color_info = S_0280A0_FORMAT(format) |
S_0280A0_COMP_SWAP(swap) |
S_0280A0_ARRAY_MODE(rtex->array_mode[level]) |
S_0280A0_BLEND_CLAMP(1) |
S_0280A0_NUMBER_TYPE(ntype);
- if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- color_info |= S_0280A0_SOURCE_FORMAT(1);
+
+ /* on R600 this can't be set if BLEND_CLAMP isn't set,
+ if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */
+ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
+ desc->channel[i].size < 12)
+ color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
r600_pipe_state_add_reg(rstate,
R_028040_CB_COLOR0_BASE + cb * 4,
@@ -988,17 +811,14 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
- rtex->tiled = 1;
- rtex->array_mode[level] = 2;
- rtex->tile_type = 1;
- rtex->depth = 1;
+
rbuffer = &rtex->resource;
/* XXX quite sure for dx10+ hw don't need any offset hacks */
offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
level, state->zsbuf->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
@@ -1029,8 +849,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&rctx->framebuffer, state);
- rctx->pframebuffer = &rctx->framebuffer;
-
/* build states */
for (int i = 0; i < state->nr_cbufs; i++) {
r600_cb(rctx, rstate, state, i);
@@ -1116,48 +934,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
}
}
-static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
- struct pipe_resource *buffer)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_resource *rbuffer = (struct r600_resource*)buffer;
-
- /* Note that the state tracker can unbind constant buffers by
- * passing NULL here.
- */
- if (buffer == NULL) {
- return;
- }
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- rctx->vs_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028980_ALU_CONST_CACHE_VS_0,
- r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
- break;
- case PIPE_SHADER_FRAGMENT:
- rctx->ps_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028940_ALU_CONST_CACHE_PS_0,
- r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
- break;
- default:
- R600_ERR("unsupported %d\n", shader);
- return;
- }
-}
-
void r600_init_state_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_blend_state = r600_create_blend_state;
@@ -1197,6 +973,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view;
rctx->context.set_viewport_state = r600_set_viewport_state;
rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
+ rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
}
void r600_init_config(struct r600_pipe_context *rctx)
@@ -1450,6 +1227,163 @@ void r600_init_config(struct r600_pipe_context *rctx)
r600_context_pipe_state_set(&rctx->ctx, rstate);
}
+void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+ struct r600_pipe_state *rstate = &shader->rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
+ int pos_index = -1, face_index = -1;
+
+ rstate->nregs = 0;
+
+ for (i = 0; i < rshader->ninput; i++) {
+ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
+ pos_index = i;
+ if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
+ face_index = i;
+ }
+
+ db_shader_control = 0;
+ for (i = 0; i < rshader->noutput; i++) {
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
+ db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
+ if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+ db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(1);
+ }
+ if (rshader->uses_kill)
+ db_shader_control |= S_02880C_KILL_ENABLE(1);
+
+ exports_ps = 0;
+ num_cout = 0;
+ for (i = 0; i < rshader->noutput; i++) {
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
+ rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+ exports_ps |= 1;
+ else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
+ num_cout++;
+ }
+ }
+ exports_ps |= S_028854_EXPORT_COLORS(num_cout);
+ if (!exports_ps) {
+ /* always at least export 1 component per pixel */
+ exports_ps = 2;
+ }
+
+ spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
+ S_0286CC_PERSP_GRADIENT_ENA(1);
+ spi_input_z = 0;
+ if (pos_index != -1) {
+ spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
+ S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+ S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
+ S_0286CC_BARYC_SAMPLE_CNTL(1));
+ spi_input_z |= 1;
+ }
+
+ spi_ps_in_control_1 = 0;
+ if (face_index != -1) {
+ spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
+ S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
+ }
+
+ r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028840_SQ_PGM_START_PS,
+ r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+ r600_pipe_state_add_reg(rstate,
+ R_028850_SQ_PGM_RESOURCES_PS,
+ S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_STACK_SIZE(rshader->bc.nstack),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028854_SQ_PGM_EXPORTS_PS,
+ exports_ps, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_0288CC_SQ_PGM_CF_OFFSET_PS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
+ S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all),
+ S_028808_MULTIWRITE_ENABLE(1),
+ NULL);
+ /* only set some bits here, the other bits are set in the dsa state */
+ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL,
+ db_shader_control,
+ S_02880C_Z_EXPORT_ENABLE(1) |
+ S_02880C_STENCIL_REF_EXPORT_ENABLE(1) |
+ S_02880C_KILL_ENABLE(1),
+ NULL);
+
+ r600_pipe_state_add_reg(rstate,
+ R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
+ 0xFFFFFFFF, NULL);
+}
+
+void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+ struct r600_pipe_state *rstate = &shader->rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned spi_vs_out_id[10];
+ unsigned i, tmp;
+
+ /* clear previous register */
+ rstate->nregs = 0;
+
+ /* so far never got proper semantic id from tgsi */
+ /* FIXME better to move this in config things so they get emited
+ * only one time per cs
+ */
+ for (i = 0; i < 10; i++) {
+ spi_vs_out_id[i] = 0;
+ }
+ for (i = 0; i < 32; i++) {
+ tmp = i << ((i & 3) * 8);
+ spi_vs_out_id[i / 4] |= tmp;
+ }
+ for (i = 0; i < 10; i++) {
+ r600_pipe_state_add_reg(rstate,
+ R_028614_SPI_VS_OUT_ID_0 + i * 4,
+ spi_vs_out_id[i], 0xFFFFFFFF, NULL);
+ }
+
+ r600_pipe_state_add_reg(rstate,
+ R_0286C4_SPI_VS_OUT_CONFIG,
+ S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028868_SQ_PGM_RESOURCES_VS,
+ S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_STACK_SIZE(rshader->bc.nstack),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_0288D0_SQ_PGM_CF_OFFSET_VS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028858_SQ_PGM_START_VS,
+ r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+
+ r600_pipe_state_add_reg(rstate,
+ R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
+ 0xFFFFFFFF, NULL);
+}
+
+void r600_fetch_shader(struct r600_vertex_element *ve)
+{
+ struct r600_pipe_state *rstate;
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(ve->fetch_shader) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
{
struct pipe_depth_stencil_alpha_state dsa;
@@ -1487,3 +1421,25 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
S_028D0C_COPY_CENTROID(1), NULL);
return rstate;
}
+
+void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride)
+{
+ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
+ rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
+ S_038008_STRIDE(stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index c647e77b37..43dad0c802 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -27,7 +27,9 @@
#include <util/u_memory.h>
#include <util/u_format.h>
#include <pipebuffer/pb_buffer.h>
+#include "pipe/p_shader_tokens.h"
#include "r600_pipe.h"
+#include "r600d.h"
/* common state between evergreen and r600 */
void r600_bind_blend_state(struct pipe_context *ctx, void *state)
@@ -121,17 +123,11 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
rctx->vertex_elements = v;
if (v) {
+ u_vbuf_mgr_bind_vertex_elements(rctx->vbuf_mgr, state,
+ v->vmgr_elements);
+
rctx->states[v->rstate.id] = &v->rstate;
r600_context_pipe_state_set(&rctx->ctx, &v->rstate);
- if (rctx->family >= CHIP_CEDAR) {
- evergreen_vertex_buffer_update(rctx);
- } else {
- r600_vertex_buffer_update(rctx);
- }
- }
-
- if (v) {
-// rctx->vs_rebuild = TRUE;
}
}
@@ -147,6 +143,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
rctx->vertex_elements = NULL;
r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL);
+ u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements);
FREE(state);
}
@@ -171,42 +168,28 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
const struct pipe_vertex_buffer *buffers)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_vertex_buffer *vbo;
- unsigned max_index = (unsigned)-1;
-
- for (int i = 0; i < rctx->nvertex_buffer; i++) {
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
- }
- memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count);
-
- for (int i = 0; i < count; i++) {
- vbo = (struct pipe_vertex_buffer*)&buffers[i];
-
- rctx->vertex_buffer[i].buffer = NULL;
- if (r600_buffer_is_user_buffer(buffers[i].buffer))
- rctx->any_user_vbs = TRUE;
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer);
+ int i;
- if (vbo->max_index == ~0) {
- if (!vbo->stride)
- vbo->max_index = 1;
- else
- vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
+ /* Zero states. */
+ for (i = 0; i < count; i++) {
+ if (!buffers[i].buffer) {
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ } else {
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ }
}
- max_index = MIN2(vbo->max_index, max_index);
}
- rctx->nvertex_buffer = count;
- rctx->vb_max_index = max_index;
- if (rctx->family >= CHIP_CEDAR) {
- evergreen_vertex_buffer_update(rctx);
- } else {
- r600_vertex_buffer_update(rctx);
+ for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) {
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ } else {
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ }
}
-}
-
-#define FORMAT_REPLACE(what, withwhat) \
- case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break
+ u_vbuf_mgr_set_vertex_buffers(rctx->vbuf_mgr, count, buffers);
+}
void *r600_create_vertex_elements(struct pipe_context *ctx,
unsigned count,
@@ -214,33 +197,15 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
- enum pipe_format *format;
- int i;
assert(count < 32);
if (!v)
return NULL;
v->count = count;
- memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element));
-
- for (i = 0; i < count; i++) {
- v->hw_format[i] = v->elements[i].src_format;
- format = &v->hw_format[i];
-
- switch (*format) {
- FORMAT_REPLACE(R64_FLOAT, R32_FLOAT);
- FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT);
- FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT);
- FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT);
- default:;
- }
- v->incompatible_layout =
- v->incompatible_layout ||
- v->elements[i].src_format != v->hw_format[i];
-
- v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4);
- }
+ v->vmgr_elements =
+ u_vbuf_mgr_create_vertex_elements(rctx->vbuf_mgr, count,
+ elements, v->elements);
if (r600_vertex_elements_build_fetch_shader(rctx, v)) {
FREE(v);
@@ -310,3 +275,274 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
r600_pipe_shader_destroy(ctx, shader);
free(shader);
}
+
+/* FIXME optimize away spi update when it's not needed */
+void r600_spi_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_shader *shader = rctx->ps_shader;
+ struct r600_pipe_state rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned i, tmp;
+
+ rstate.nregs = 0;
+ for (i = 0; i < rshader->ninput; i++) {
+ tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+
+ if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
+ tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
+ }
+
+ if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
+ rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
+ tmp |= S_028644_PT_SPRITE_TEX(1);
+ }
+
+ if (rctx->family < CHIP_CEDAR) {
+ if (rshader->input[i].centroid)
+ tmp |= S_028644_SEL_CENTROID(1);
+
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ tmp |= S_028644_SEL_LINEAR(1);
+ }
+
+ r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
+ }
+ r600_context_pipe_state_set(&rctx->ctx, &rstate);
+}
+
+void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
+ struct pipe_resource *buffer)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_resource_buffer *rbuffer = r600_buffer(buffer);
+ struct r600_pipe_state *rstate;
+ uint32_t offset;
+
+ /* Note that the state tracker can unbind constant buffers by
+ * passing NULL here.
+ */
+ if (buffer == NULL) {
+ return;
+ }
+
+ r600_upload_const_buffer(rctx, &rbuffer, &offset);
+ offset += r600_bo_offset(rbuffer->r.bo);
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ rctx->vs_const_buffer.nregs = 0;
+ r600_pipe_state_add_reg(&rctx->vs_const_buffer,
+ R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
+ ALIGN_DIVUP(buffer->width0 >> 4, 16),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vs_const_buffer,
+ R_028980_ALU_CONST_CACHE_VS_0,
+ offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
+
+ rstate = &rctx->vs_const_buffer_resource[index];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
+ } else {
+ r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
+ }
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ rctx->ps_const_buffer.nregs = 0;
+ r600_pipe_state_add_reg(&rctx->ps_const_buffer,
+ R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
+ ALIGN_DIVUP(buffer->width0 >> 4, 16),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->ps_const_buffer,
+ R_028940_ALU_CONST_CACHE_PS_0,
+ offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
+
+ rstate = &rctx->ps_const_buffer_resource[index];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
+ } else {
+ r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
+ }
+ break;
+ default:
+ R600_ERR("unsupported %d\n", shader);
+ return;
+ }
+
+ if (buffer != &rbuffer->r.b.b.b)
+ pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL);
+}
+
+static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state *rstate;
+ struct r600_resource *rbuffer;
+ struct pipe_vertex_buffer *vertex_buffer;
+ unsigned i, count, offset;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ count = rctx->vertex_elements->count;
+ } else {
+ /* bind vertex buffer once */
+ count = rctx->vbuf_mgr->nr_real_vertex_buffers;
+ }
+
+ for (i = 0 ; i < count; i++) {
+ rstate = &rctx->fs_resource[i];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ unsigned vbuffer_index;
+ vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+ vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[vbuffer_index];
+ rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[vbuffer_index];
+ offset = rctx->vertex_elements->vbuffer_offset[i];
+ } else {
+ /* bind vertex buffer once */
+ vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[i];
+ rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[i];
+ offset = 0;
+ }
+ if (vertex_buffer == NULL || rbuffer == NULL)
+ continue;
+ offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
+
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ } else {
+ r600_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ }
+ }
+}
+
+void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_resource *rbuffer;
+ u32 vgt_dma_index_type, vgt_draw_initiator, mask;
+ struct r600_draw rdraw;
+ struct r600_pipe_state vgt;
+ struct r600_drawl draw = {};
+ unsigned prim;
+
+ r600_flush_depth_textures(rctx);
+ u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL);
+ r600_vertex_buffer_update(rctx);
+
+ draw.info = *info;
+ draw.ctx = ctx;
+ if (info->indexed && rctx->index_buffer.buffer) {
+ draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
+ pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
+
+ r600_translate_index_buffer(rctx, &draw.index_buffer,
+ &rctx->index_buffer.index_size,
+ &draw.info.start,
+ info->count);
+
+ draw.index_size = rctx->index_buffer.index_size;
+ draw.index_buffer_offset = draw.info.start * draw.index_size;
+ draw.info.start = 0;
+
+ if (u_vbuf_resource(draw.index_buffer)->user_ptr) {
+ r600_upload_index_buffer(rctx, &draw);
+ }
+ } else {
+ draw.info.index_bias = info->start;
+ }
+
+ switch (draw.index_size) {
+ case 2:
+ vgt_draw_initiator = 0;
+ vgt_dma_index_type = 0;
+ break;
+ case 4:
+ vgt_draw_initiator = 0;
+ vgt_dma_index_type = 1;
+ break;
+ case 0:
+ vgt_draw_initiator = 2;
+ vgt_dma_index_type = 0;
+ break;
+ default:
+ R600_ERR("unsupported index size %d\n", draw.index_size);
+ return;
+ }
+ if (r600_conv_pipe_prim(draw.info.mode, &prim))
+ return;
+ if (unlikely(rctx->ps_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
+ return;
+ }
+ if (unlikely(rctx->vs_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
+ return;
+ }
+ /* there should be enough input */
+ if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
+ R600_ERR("%d resources provided, expecting %d\n",
+ rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
+ return;
+ }
+
+ r600_spi_update(rctx);
+
+ mask = 0;
+ for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
+ mask |= (0xF << (i * 4));
+ }
+
+ vgt.id = R600_PIPE_STATE_VGT;
+ vgt.nregs = 0;
+ r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set(&rctx->ctx, &vgt);
+
+ rdraw.vgt_num_indices = draw.info.count;
+ rdraw.vgt_num_instances = draw.info.instance_count;
+ rdraw.vgt_index_type = vgt_dma_index_type;
+ rdraw.vgt_draw_initiator = vgt_draw_initiator;
+ rdraw.indices = NULL;
+ if (draw.index_buffer) {
+ rbuffer = (struct r600_resource*)draw.index_buffer;
+ rdraw.indices = rbuffer->bo;
+ rdraw.indices_bo_offset = draw.index_buffer_offset;
+ }
+
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_context_draw(&rctx->ctx, &rdraw);
+ } else {
+ r600_context_draw(&rctx->ctx, &rdraw);
+ }
+
+ if (rctx->framebuffer.zsbuf)
+ {
+ struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture;
+ ((struct r600_resource_texture *)tex)->dirty_db = TRUE;
+ }
+
+ pipe_resource_reference(&draw.index_buffer, NULL);
+
+ u_vbuf_mgr_draw_end(rctx->vbuf_mgr);
+}
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index d994196e19..3d0360485a 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim)
default:
case PIPE_TEXTURE_1D:
return V_038000_SQ_TEX_DIM_1D;
+ case PIPE_TEXTURE_1D_ARRAY:
+ return V_038000_SQ_TEX_DIM_1D_ARRAY;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
return V_038000_SQ_TEX_DIM_2D;
+ case PIPE_TEXTURE_2D_ARRAY:
+ return V_038000_SQ_TEX_DIM_2D_ARRAY;
case PIPE_TEXTURE_3D:
return V_038000_SQ_TEX_DIM_3D;
case PIPE_TEXTURE_CUBE:
@@ -285,10 +289,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_ALT_REV;
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_0280A0_SWAP_STD;
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_0280A0_SWAP_ALT;
+
/* 16-bit buffers. */
case PIPE_FORMAT_B5G6R5_UNORM:
return V_0280A0_SWAP_STD_REV;
@@ -305,6 +313,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
return V_0280A0_SWAP_ALT;
case PIPE_FORMAT_R8G8_UNORM:
return V_0280A0_SWAP_STD;
@@ -327,6 +336,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_X8R8G8B8_UNORM:
return V_0280A0_SWAP_ALT_REV;
case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
return V_0280A0_SWAP_STD;
@@ -345,9 +355,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10X2_SNORM:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_0280A0_SWAP_STD_REV;
+ return V_0280A0_SWAP_STD;
+
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return V_0280A0_SWAP_ALT;
case PIPE_FORMAT_R16G16_UNORM:
return V_0280A0_SWAP_STD;
@@ -355,14 +367,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
- // return FMT_16_16_16_16;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
- // return FMT_16_16_16_16_FLOAT;
/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
- // return FMT_32_32_32_32_FLOAT;
- return 0;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_0280A0_SWAP_STD;
default:
R600_ERR("unsupported colorswap format %d\n", format);
return ~0;
@@ -373,10 +384,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
{
switch (format) {
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_0280A0_COLOR_4_4;
+
/* 8-bit buffers. */
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_0280A0_COLOR_8;
@@ -397,6 +412,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_0280A0_COLOR_16;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
case PIPE_FORMAT_R8G8_UNORM:
return V_0280A0_COLOR_8_8;
@@ -423,7 +439,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R10G10B10X2_SNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_0280A0_COLOR_10_10_10_2;
+ return V_0280A0_COLOR_2_10_10_10;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
@@ -469,6 +485,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_0280A0_COLOR_32_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return V_0280A0_COLOR_32_32_32_32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_0280A0_COLOR_32_32_32_32;
/* YUV buffers. */
case PIPE_FORMAT_UYVY:
@@ -479,9 +498,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
}
}
-static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format)
+static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
{
- return r600_translate_texformat(format, NULL, NULL, NULL) != ~0;
+ return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0;
}
static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format)
@@ -495,21 +514,13 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format)
return r600_translate_dbformat(format) != ~0;
}
-static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format)
+static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format,
+ enum radeon_family family)
{
- return r600_translate_colorformat(format) != ~0;
-}
-
-static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
-{
- uint32_t result = 0;
- const struct util_format_description *desc;
unsigned i;
-
- desc = util_format_description(format);
- if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
- goto out_unknown;
- }
+ const struct util_format_description *desc = util_format_description(format);
+ if (!desc)
+ return FALSE;
/* Find the first non-VOID channel. */
for (i = 0; i < 4; i++) {
@@ -517,122 +528,23 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
break;
}
}
-
- switch (desc->channel[i].type) {
- /* Half-floats, floats, doubles */
- case UTIL_FORMAT_TYPE_FLOAT:
- switch (desc->channel[i].size) {
- case 16:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_16_FLOAT;
- break;
- case 2:
- result = FMT_16_16_FLOAT;
- break;
- case 3:
- result = FMT_16_16_16_FLOAT;
- break;
- case 4:
- result = FMT_16_16_16_16_FLOAT;
- break;
- }
- break;
- case 32:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_32_FLOAT;
- break;
- case 2:
- result = FMT_32_32_FLOAT;
- break;
- case 3:
- result = FMT_32_32_32_FLOAT;
- break;
- case 4:
- result = FMT_32_32_32_32_FLOAT;
- break;
- }
- break;
- default:
- goto out_unknown;
- }
- break;
- /* Unsigned ints */
- case UTIL_FORMAT_TYPE_UNSIGNED:
- /* Signed ints */
- case UTIL_FORMAT_TYPE_SIGNED:
- switch (desc->channel[i].size) {
- case 8:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_8;
- break;
- case 2:
- result = FMT_8_8;
- break;
- case 3:
- // result = FMT_8_8_8; /* fails piglit draw-vertices test */
- // break;
- case 4:
- result = FMT_8_8_8_8;
- break;
- }
- break;
- case 16:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_16;
- break;
- case 2:
- result = FMT_16_16;
- break;
- case 3:
- // result = FMT_16_16_16; /* fails piglit draw-vertices test */
- // break;
- case 4:
- result = FMT_16_16_16_16;
- break;
- }
- break;
- case 32:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_32;
- break;
- case 2:
- result = FMT_32_32;
- break;
- case 3:
- result = FMT_32_32_32;
- break;
- case 4:
- result = FMT_32_32_32_32;
- break;
- }
- break;
- default:
- goto out_unknown;
- }
- break;
- default:
- goto out_unknown;
- }
-
- result = S_038008_DATA_FORMAT(result);
-
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
- result |= S_038008_FORMAT_COMP_ALL(1);
- }
- if (desc->channel[i].normalized) {
- result |= S_038008_NUM_FORMAT_ALL(0);
- } else {
- result |= S_038008_NUM_FORMAT_ALL(2);
- }
- return result;
-out_unknown:
- R600_ERR("unsupported vertex format %s\n", util_format_name(format));
- return ~0;
+ if (i == 4)
+ return FALSE;
+
+ /* No fixed, no double. */
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED ||
+ (desc->channel[i].size == 64 &&
+ desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT))
+ return FALSE;
+
+ /* No scaled/norm formats with 32 bits per channel. */
+ if (desc->channel[i].size == 32 &&
+ (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED))
+ return FALSE;
+
+ return TRUE;
}
#endif
diff --git a/src/gallium/drivers/r600/r600_states_inc.h b/src/gallium/drivers/r600/r600_states_inc.h
deleted file mode 100644
index 1c8075ebdb..0000000000
--- a/src/gallium/drivers/r600/r600_states_inc.h
+++ /dev/null
@@ -1,543 +0,0 @@
-/* This file is autogenerated from r600_states.h - do not edit directly */
-/* autogenerating script is gen_r600_states.py */
-
-/* R600_CONFIG */
-#define R600_CONFIG__SQ_CONFIG 0
-#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1
-#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2 2
-#define R600_CONFIG__SQ_THREAD_RESOURCE_MGMT 3
-#define R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1 4
-#define R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2 5
-#define R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 6
-#define R600_CONFIG__TA_CNTL_AUX 7
-#define R600_CONFIG__VC_ENHANCE 8
-#define R600_CONFIG__DB_DEBUG 9
-#define R600_CONFIG__DB_WATERMARKS 10
-#define R600_CONFIG__SX_MISC 11
-#define R600_CONFIG__SPI_THREAD_GROUPING 12
-#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 13
-#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 14
-#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 15
-#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 16
-#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 17
-#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 18
-#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 19
-#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 20
-#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 21
-#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 22
-#define R600_CONFIG__VGT_HOS_CNTL 23
-#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 24
-#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 25
-#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 26
-#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 27
-#define R600_CONFIG__VGT_GROUP_FIRST_DECR 28
-#define R600_CONFIG__VGT_GROUP_DECR 29
-#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 30
-#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 31
-#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 32
-#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 33
-#define R600_CONFIG__VGT_GS_MODE 34
-#define R600_CONFIG__PA_SC_MODE_CNTL 35
-#define R600_CONFIG__VGT_STRMOUT_EN 36
-#define R600_CONFIG__VGT_REUSE_OFF 37
-#define R600_CONFIG__VGT_VTX_CNT_EN 38
-#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 39
-#define R600_CONFIG_SIZE 40
-#define R600_CONFIG_PM4 128
-
-/* R600_CB_CNTL */
-#define R600_CB_CNTL__CB_CLEAR_RED 0
-#define R600_CB_CNTL__CB_CLEAR_GREEN 1
-#define R600_CB_CNTL__CB_CLEAR_BLUE 2
-#define R600_CB_CNTL__CB_CLEAR_ALPHA 3
-#define R600_CB_CNTL__CB_SHADER_MASK 4
-#define R600_CB_CNTL__CB_TARGET_MASK 5
-#define R600_CB_CNTL__CB_FOG_RED 6
-#define R600_CB_CNTL__CB_FOG_GREEN 7
-#define R600_CB_CNTL__CB_FOG_BLUE 8
-#define R600_CB_CNTL__CB_COLOR_CONTROL 9
-#define R600_CB_CNTL__PA_SC_AA_CONFIG 10
-#define R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX 11
-#define R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 12
-#define R600_CB_CNTL__CB_CLRCMP_CONTROL 13
-#define R600_CB_CNTL__CB_CLRCMP_SRC 14
-#define R600_CB_CNTL__CB_CLRCMP_DST 15
-#define R600_CB_CNTL__CB_CLRCMP_MSK 16
-#define R600_CB_CNTL__PA_SC_AA_MASK 17
-#define R600_CB_CNTL__CB_SHADER_CONTROL 18
-#define R600_CB_CNTL_SIZE 19
-#define R600_CB_CNTL_PM4 128
-
-/* R600_RASTERIZER */
-#define R600_RASTERIZER__SPI_INTERP_CONTROL_0 0
-#define R600_RASTERIZER__PA_CL_CLIP_CNTL 1
-#define R600_RASTERIZER__PA_SU_SC_MODE_CNTL 2
-#define R600_RASTERIZER__PA_CL_VS_OUT_CNTL 3
-#define R600_RASTERIZER__PA_CL_NANINF_CNTL 4
-#define R600_RASTERIZER__PA_SU_POINT_SIZE 5
-#define R600_RASTERIZER__PA_SU_POINT_MINMAX 6
-#define R600_RASTERIZER__PA_SU_LINE_CNTL 7
-#define R600_RASTERIZER__PA_SC_LINE_STIPPLE 8
-#define R600_RASTERIZER__PA_SC_MPASS_PS_CNTL 9
-#define R600_RASTERIZER__PA_SC_LINE_CNTL 10
-#define R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ 11
-#define R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ 12
-#define R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ 13
-#define R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ 14
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL 15
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP 16
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE 17
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET 18
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE 19
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET 20
-#define R600_RASTERIZER_SIZE 21
-#define R600_RASTERIZER_PM4 128
-
-/* R600_VIEWPORT */
-#define R600_VIEWPORT__PA_SC_VPORT_ZMIN_0 0
-#define R600_VIEWPORT__PA_SC_VPORT_ZMAX_0 1
-#define R600_VIEWPORT__PA_CL_VPORT_XSCALE_0 2
-#define R600_VIEWPORT__PA_CL_VPORT_YSCALE_0 3
-#define R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0 4
-#define R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0 5
-#define R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0 6
-#define R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0 7
-#define R600_VIEWPORT__PA_CL_VTE_CNTL 8
-#define R600_VIEWPORT_SIZE 9
-#define R600_VIEWPORT_PM4 128
-
-/* R600_SCISSOR */
-#define R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL 0
-#define R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR 1
-#define R600_SCISSOR__PA_SC_WINDOW_OFFSET 2
-#define R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL 3
-#define R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR 4
-#define R600_SCISSOR__PA_SC_CLIPRECT_RULE 5
-#define R600_SCISSOR__PA_SC_CLIPRECT_0_TL 6
-#define R600_SCISSOR__PA_SC_CLIPRECT_0_BR 7
-#define R600_SCISSOR__PA_SC_CLIPRECT_1_TL 8
-#define R600_SCISSOR__PA_SC_CLIPRECT_1_BR 9
-#define R600_SCISSOR__PA_SC_CLIPRECT_2_TL 10
-#define R600_SCISSOR__PA_SC_CLIPRECT_2_BR 11
-#define R600_SCISSOR__PA_SC_CLIPRECT_3_TL 12
-#define R600_SCISSOR__PA_SC_CLIPRECT_3_BR 13
-#define R600_SCISSOR__PA_SC_EDGERULE 14
-#define R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL 15
-#define R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR 16
-#define R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL 17
-#define R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR 18
-#define R600_SCISSOR_SIZE 19
-#define R600_SCISSOR_PM4 128
-
-/* R600_BLEND */
-#define R600_BLEND__CB_BLEND_RED 0
-#define R600_BLEND__CB_BLEND_GREEN 1
-#define R600_BLEND__CB_BLEND_BLUE 2
-#define R600_BLEND__CB_BLEND_ALPHA 3
-#define R600_BLEND__CB_BLEND0_CONTROL 4
-#define R600_BLEND__CB_BLEND1_CONTROL 5
-#define R600_BLEND__CB_BLEND2_CONTROL 6
-#define R600_BLEND__CB_BLEND3_CONTROL 7
-#define R600_BLEND__CB_BLEND4_CONTROL 8
-#define R600_BLEND__CB_BLEND5_CONTROL 9
-#define R600_BLEND__CB_BLEND6_CONTROL 10
-#define R600_BLEND__CB_BLEND7_CONTROL 11
-#define R600_BLEND__CB_BLEND_CONTROL 12
-#define R600_BLEND_SIZE 13
-#define R600_BLEND_PM4 128
-
-/* R600_DSA */
-#define R600_DSA__DB_STENCIL_CLEAR 0
-#define R600_DSA__DB_DEPTH_CLEAR 1
-#define R600_DSA__SX_ALPHA_TEST_CONTROL 2
-#define R600_DSA__DB_STENCILREFMASK 3
-#define R600_DSA__DB_STENCILREFMASK_BF 4
-#define R600_DSA__SX_ALPHA_REF 5
-#define R600_DSA__SPI_FOG_FUNC_SCALE 6
-#define R600_DSA__SPI_FOG_FUNC_BIAS 7
-#define R600_DSA__SPI_FOG_CNTL 8
-#define R600_DSA__DB_DEPTH_CONTROL 9
-#define R600_DSA__DB_SHADER_CONTROL 10
-#define R600_DSA__DB_RENDER_CONTROL 11
-#define R600_DSA__DB_RENDER_OVERRIDE 12
-#define R600_DSA__DB_SRESULTS_COMPARE_STATE1 13
-#define R600_DSA__DB_PRELOAD_CONTROL 14
-#define R600_DSA__DB_ALPHA_TO_MASK 15
-#define R600_DSA_SIZE 16
-#define R600_DSA_PM4 128
-
-/* R600_VS_SHADER */
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_0 0
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_1 1
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_2 2
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_3 3
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_4 4
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_5 5
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_6 6
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_7 7
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_8 8
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_9 9
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_10 10
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_11 11
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_12 12
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_13 13
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_14 14
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_15 15
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_16 16
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_17 17
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_18 18
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_19 19
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_20 20
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_21 21
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_22 22
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_23 23
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_24 24
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_25 25
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_26 26
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_27 27
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_28 28
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_29 29
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_30 30
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_31 31
-#define R600_VS_SHADER__SPI_VS_OUT_ID_0 32
-#define R600_VS_SHADER__SPI_VS_OUT_ID_1 33
-#define R600_VS_SHADER__SPI_VS_OUT_ID_2 34
-#define R600_VS_SHADER__SPI_VS_OUT_ID_3 35
-#define R600_VS_SHADER__SPI_VS_OUT_ID_4 36
-#define R600_VS_SHADER__SPI_VS_OUT_ID_5 37
-#define R600_VS_SHADER__SPI_VS_OUT_ID_6 38
-#define R600_VS_SHADER__SPI_VS_OUT_ID_7 39
-#define R600_VS_SHADER__SPI_VS_OUT_ID_8 40
-#define R600_VS_SHADER__SPI_VS_OUT_ID_9 41
-#define R600_VS_SHADER__SPI_VS_OUT_CONFIG 42
-#define R600_VS_SHADER__SQ_PGM_START_VS 43
-#define R600_VS_SHADER__SQ_PGM_RESOURCES_VS 44
-#define R600_VS_SHADER__SQ_PGM_START_FS 45
-#define R600_VS_SHADER__SQ_PGM_RESOURCES_FS 46
-#define R600_VS_SHADER__SQ_PGM_CF_OFFSET_VS 47
-#define R600_VS_SHADER__SQ_PGM_CF_OFFSET_FS 48
-#define R600_VS_SHADER_SIZE 49
-#define R600_VS_SHADER_PM4 128
-
-/* R600_PS_SHADER */
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 0
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_1 1
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_2 2
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_3 3
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_4 4
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_5 5
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_6 6
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_7 7
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_8 8
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_9 9
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_10 10
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_11 11
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_12 12
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_13 13
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_14 14
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_15 15
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_16 16
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_17 17
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_18 18
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_19 19
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_20 20
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_21 21
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_22 22
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_23 23
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_24 24
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_25 25
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_26 26
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_27 27
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_28 28
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_29 29
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_30 30
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_31 31
-#define R600_PS_SHADER__SPI_PS_IN_CONTROL_0 32
-#define R600_PS_SHADER__SPI_PS_IN_CONTROL_1 33
-#define R600_PS_SHADER__SPI_INPUT_Z 34
-#define R600_PS_SHADER__SQ_PGM_START_PS 35
-#define R600_PS_SHADER__SQ_PGM_RESOURCES_PS 36
-#define R600_PS_SHADER__SQ_PGM_EXPORTS_PS 37
-#define R600_PS_SHADER__SQ_PGM_CF_OFFSET_PS 38
-#define R600_PS_SHADER_SIZE 39
-#define R600_PS_SHADER_PM4 128
-
-/* R600_VS_CBUF */
-#define R600_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0 0
-#define R600_VS_CBUF__ALU_CONST_CACHE_VS_0 1
-#define R600_VS_CBUF_SIZE 2
-#define R600_VS_CBUF_PM4 128
-
-/* R600_PS_CBUF */
-#define R600_PS_CBUF__ALU_CONST_BUFFER_SIZE_PS_0 0
-#define R600_PS_CBUF__ALU_CONST_CACHE_PS_0 1
-#define R600_PS_CBUF_SIZE 2
-#define R600_PS_CBUF_PM4 128
-
-/* R600_PS_CONSTANT */
-#define R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0 0
-#define R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0 1
-#define R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0 2
-#define R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0 3
-#define R600_PS_CONSTANT_SIZE 4
-#define R600_PS_CONSTANT_PM4 128
-
-/* R600_VS_CONSTANT */
-#define R600_VS_CONSTANT__SQ_ALU_CONSTANT0_256 0
-#define R600_VS_CONSTANT__SQ_ALU_CONSTANT1_256 1
-#define R600_VS_CONSTANT__SQ_ALU_CONSTANT2_256 2
-#define R600_VS_CONSTANT__SQ_ALU_CONSTANT3_256 3
-#define R600_VS_CONSTANT_SIZE 4
-#define R600_VS_CONSTANT_PM4 128
-
-/* R600_UCP */
-#define R600_UCP__PA_CL_UCP0_X 0
-#define R600_UCP__PA_CL_UCP0_Y 1
-#define R600_UCP__PA_CL_UCP0_Z 2
-#define R600_UCP__PA_CL_UCP0_W 3
-#define R600_UCP__PA_CL_UCP1_X 4
-#define R600_UCP__PA_CL_UCP1_Y 5
-#define R600_UCP__PA_CL_UCP1_Z 6
-#define R600_UCP__PA_CL_UCP1_W 7
-#define R600_UCP__PA_CL_UCP2_X 8
-#define R600_UCP__PA_CL_UCP2_Y 9
-#define R600_UCP__PA_CL_UCP2_Z 10
-#define R600_UCP__PA_CL_UCP2_W 11
-#define R600_UCP__PA_CL_UCP3_X 12
-#define R600_UCP__PA_CL_UCP3_Y 13
-#define R600_UCP__PA_CL_UCP3_Z 14
-#define R600_UCP__PA_CL_UCP3_W 15
-#define R600_UCP__PA_CL_UCP4_X 16
-#define R600_UCP__PA_CL_UCP4_Y 17
-#define R600_UCP__PA_CL_UCP4_Z 18
-#define R600_UCP__PA_CL_UCP4_W 19
-#define R600_UCP__PA_CL_UCP5_X 20
-#define R600_UCP__PA_CL_UCP5_Y 21
-#define R600_UCP__PA_CL_UCP5_Z 22
-#define R600_UCP__PA_CL_UCP5_W 23
-#define R600_UCP_SIZE 24
-#define R600_UCP_PM4 128
-
-/* R600_PS_RESOURCE */
-#define R600_PS_RESOURCE__RESOURCE0_WORD0 0
-#define R600_PS_RESOURCE__RESOURCE0_WORD1 1
-#define R600_PS_RESOURCE__RESOURCE0_WORD2 2
-#define R600_PS_RESOURCE__RESOURCE0_WORD3 3
-#define R600_PS_RESOURCE__RESOURCE0_WORD4 4
-#define R600_PS_RESOURCE__RESOURCE0_WORD5 5
-#define R600_PS_RESOURCE__RESOURCE0_WORD6 6
-#define R600_PS_RESOURCE_SIZE 7
-#define R600_PS_RESOURCE_PM4 128
-
-/* R600_VS_RESOURCE */
-#define R600_VS_RESOURCE__RESOURCE160_WORD0 0
-#define R600_VS_RESOURCE__RESOURCE160_WORD1 1
-#define R600_VS_RESOURCE__RESOURCE160_WORD2 2
-#define R600_VS_RESOURCE__RESOURCE160_WORD3 3
-#define R600_VS_RESOURCE__RESOURCE160_WORD4 4
-#define R600_VS_RESOURCE__RESOURCE160_WORD5 5
-#define R600_VS_RESOURCE__RESOURCE160_WORD6 6
-#define R600_VS_RESOURCE_SIZE 7
-#define R600_VS_RESOURCE_PM4 128
-
-/* R600_FS_RESOURCE */
-#define R600_FS_RESOURCE__RESOURCE320_WORD0 0
-#define R600_FS_RESOURCE__RESOURCE320_WORD1 1
-#define R600_FS_RESOURCE__RESOURCE320_WORD2 2
-#define R600_FS_RESOURCE__RESOURCE320_WORD3 3
-#define R600_FS_RESOURCE__RESOURCE320_WORD4 4
-#define R600_FS_RESOURCE__RESOURCE320_WORD5 5
-#define R600_FS_RESOURCE__RESOURCE320_WORD6 6
-#define R600_FS_RESOURCE_SIZE 7
-#define R600_FS_RESOURCE_PM4 128
-
-/* R600_GS_RESOURCE */
-#define R600_GS_RESOURCE__RESOURCE336_WORD0 0
-#define R600_GS_RESOURCE__RESOURCE336_WORD1 1
-#define R600_GS_RESOURCE__RESOURCE336_WORD2 2
-#define R600_GS_RESOURCE__RESOURCE336_WORD3 3
-#define R600_GS_RESOURCE__RESOURCE336_WORD4 4
-#define R600_GS_RESOURCE__RESOURCE336_WORD5 5
-#define R600_GS_RESOURCE__RESOURCE336_WORD6 6
-#define R600_GS_RESOURCE_SIZE 7
-#define R600_GS_RESOURCE_PM4 128
-
-/* R600_PS_SAMPLER */
-#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0 0
-#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0 1
-#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0 2
-#define R600_PS_SAMPLER_SIZE 3
-#define R600_PS_SAMPLER_PM4 128
-
-/* R600_VS_SAMPLER */
-#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD0_18 0
-#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD1_18 1
-#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD2_18 2
-#define R600_VS_SAMPLER_SIZE 3
-#define R600_VS_SAMPLER_PM4 128
-
-/* R600_GS_SAMPLER */
-#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD0_36 0
-#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD1_36 1
-#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD2_36 2
-#define R600_GS_SAMPLER_SIZE 3
-#define R600_GS_SAMPLER_PM4 128
-
-/* R600_PS_SAMPLER_BORDER */
-#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 0
-#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 1
-#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 2
-#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 3
-#define R600_PS_SAMPLER_BORDER_SIZE 4
-#define R600_PS_SAMPLER_BORDER_PM4 128
-
-/* R600_VS_SAMPLER_BORDER */
-#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 0
-#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 1
-#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 2
-#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 3
-#define R600_VS_SAMPLER_BORDER_SIZE 4
-#define R600_VS_SAMPLER_BORDER_PM4 128
-
-/* R600_GS_SAMPLER_BORDER */
-#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 0
-#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 1
-#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 2
-#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 3
-#define R600_GS_SAMPLER_BORDER_SIZE 4
-#define R600_GS_SAMPLER_BORDER_PM4 128
-
-/* R600_CB0 */
-#define R600_CB0__CB_COLOR0_BASE 0
-#define R600_CB0__CB_COLOR0_INFO 1
-#define R600_CB0__CB_COLOR0_SIZE 2
-#define R600_CB0__CB_COLOR0_VIEW 3
-#define R600_CB0__CB_COLOR0_FRAG 4
-#define R600_CB0__CB_COLOR0_TILE 5
-#define R600_CB0__CB_COLOR0_MASK 6
-#define R600_CB0_SIZE 7
-#define R600_CB0_PM4 128
-
-/* R600_CB1 */
-#define R600_CB1__CB_COLOR1_BASE 0
-#define R600_CB1__CB_COLOR1_INFO 1
-#define R600_CB1__CB_COLOR1_SIZE 2
-#define R600_CB1__CB_COLOR1_VIEW 3
-#define R600_CB1__CB_COLOR1_FRAG 4
-#define R600_CB1__CB_COLOR1_TILE 5
-#define R600_CB1__CB_COLOR1_MASK 6
-#define R600_CB1_SIZE 7
-#define R600_CB1_PM4 128
-
-/* R600_CB2 */
-#define R600_CB2__CB_COLOR2_BASE 0
-#define R600_CB2__CB_COLOR2_INFO 1
-#define R600_CB2__CB_COLOR2_SIZE 2
-#define R600_CB2__CB_COLOR2_VIEW 3
-#define R600_CB2__CB_COLOR2_FRAG 4
-#define R600_CB2__CB_COLOR2_TILE 5
-#define R600_CB2__CB_COLOR2_MASK 6
-#define R600_CB2_SIZE 7
-#define R600_CB2_PM4 128
-
-/* R600_CB3 */
-#define R600_CB3__CB_COLOR3_BASE 0
-#define R600_CB3__CB_COLOR3_INFO 1
-#define R600_CB3__CB_COLOR3_SIZE 2
-#define R600_CB3__CB_COLOR3_VIEW 3
-#define R600_CB3__CB_COLOR3_FRAG 4
-#define R600_CB3__CB_COLOR3_TILE 5
-#define R600_CB3__CB_COLOR3_MASK 6
-#define R600_CB3_SIZE 7
-#define R600_CB3_PM4 128
-
-/* R600_CB4 */
-#define R600_CB4__CB_COLOR4_BASE 0
-#define R600_CB4__CB_COLOR4_INFO 1
-#define R600_CB4__CB_COLOR4_SIZE 2
-#define R600_CB4__CB_COLOR4_VIEW 3
-#define R600_CB4__CB_COLOR4_FRAG 4
-#define R600_CB4__CB_COLOR4_TILE 5
-#define R600_CB4__CB_COLOR4_MASK 6
-#define R600_CB4_SIZE 7
-#define R600_CB4_PM4 128
-
-/* R600_CB5 */
-#define R600_CB5__CB_COLOR5_BASE 0
-#define R600_CB5__CB_COLOR5_INFO 1
-#define R600_CB5__CB_COLOR5_SIZE 2
-#define R600_CB5__CB_COLOR5_VIEW 3
-#define R600_CB5__CB_COLOR5_FRAG 4
-#define R600_CB5__CB_COLOR5_TILE 5
-#define R600_CB5__CB_COLOR5_MASK 6
-#define R600_CB5_SIZE 7
-#define R600_CB5_PM4 128
-
-/* R600_CB6 */
-#define R600_CB6__CB_COLOR6_BASE 0
-#define R600_CB6__CB_COLOR6_INFO 1
-#define R600_CB6__CB_COLOR6_SIZE 2
-#define R600_CB6__CB_COLOR6_VIEW 3
-#define R600_CB6__CB_COLOR6_FRAG 4
-#define R600_CB6__CB_COLOR6_TILE 5
-#define R600_CB6__CB_COLOR6_MASK 6
-#define R600_CB6_SIZE 7
-#define R600_CB6_PM4 128
-
-/* R600_CB7 */
-#define R600_CB7__CB_COLOR7_BASE 0
-#define R600_CB7__CB_COLOR7_INFO 1
-#define R600_CB7__CB_COLOR7_SIZE 2
-#define R600_CB7__CB_COLOR7_VIEW 3
-#define R600_CB7__CB_COLOR7_FRAG 4
-#define R600_CB7__CB_COLOR7_TILE 5
-#define R600_CB7__CB_COLOR7_MASK 6
-#define R600_CB7_SIZE 7
-#define R600_CB7_PM4 128
-
-/* R600_DB */
-#define R600_DB__DB_DEPTH_BASE 0
-#define R600_DB__DB_DEPTH_SIZE 1
-#define R600_DB__DB_DEPTH_VIEW 2
-#define R600_DB__DB_DEPTH_INFO 3
-#define R600_DB__DB_HTILE_SURFACE 4
-#define R600_DB__DB_PREFETCH_LIMIT 5
-#define R600_DB_SIZE 6
-#define R600_DB_PM4 128
-
-/* R600_VGT */
-#define R600_VGT__VGT_PRIMITIVE_TYPE 0
-#define R600_VGT__VGT_MAX_VTX_INDX 1
-#define R600_VGT__VGT_MIN_VTX_INDX 2
-#define R600_VGT__VGT_INDX_OFFSET 3
-#define R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX 4
-#define R600_VGT__VGT_DMA_INDEX_TYPE 5
-#define R600_VGT__VGT_PRIMITIVEID_EN 6
-#define R600_VGT__VGT_DMA_NUM_INSTANCES 7
-#define R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN 8
-#define R600_VGT__VGT_INSTANCE_STEP_RATE_0 9
-#define R600_VGT__VGT_INSTANCE_STEP_RATE_1 10
-#define R600_VGT_SIZE 11
-#define R600_VGT_PM4 128
-
-/* R600_DRAW */
-#define R600_DRAW__VGT_NUM_INDICES 0
-#define R600_DRAW__VGT_DMA_BASE_HI 1
-#define R600_DRAW__VGT_DMA_BASE 2
-#define R600_DRAW__VGT_DRAW_INITIATOR 3
-#define R600_DRAW_SIZE 4
-#define R600_DRAW_PM4 128
-
-/* R600_VGT_EVENT */
-#define R600_VGT_EVENT__VGT_EVENT_INITIATOR 0
-#define R600_VGT_EVENT_SIZE 1
-#define R600_VGT_EVENT_PM4 128
-
-/* R600_CB_FLUSH */
-#define R600_CB_FLUSH_SIZE 0
-#define R600_CB_FLUSH_PM4 128
-
-/* R600_DB_FLUSH */
-#define R600_DB_FLUSH_SIZE 0
-#define R600_DB_FLUSH_PM4 128
-
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index d4d9b07c0e..dc351bfb62 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -27,6 +27,7 @@
#include <errno.h>
#include <pipe/p_screen.h>
#include <util/u_format.h>
+#include <util/u_format_s3tc.h>
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
@@ -38,8 +39,6 @@
#include "r600d.h"
#include "r600_formats.h"
-extern struct u_resource_vtbl r600_texture_vtbl;
-
/* Copy from a full GPU texture to a transfer's staging one. */
static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
{
@@ -69,7 +68,7 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600
rtransfer->staging_texture,
0, &sbox);
- ctx->flush(ctx, 0, NULL);
+ ctx->flush(ctx, NULL);
}
unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
@@ -77,17 +76,15 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
{
unsigned offset = rtex->offset[level];
- switch (rtex->resource.base.b.target) {
+ switch (rtex->resource.b.b.b.target) {
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_CUBE:
- return offset + layer * rtex->layer_size[level];
default:
- assert(layer == 0);
- return offset;
+ return offset + layer * rtex->layer_size[level];
}
}
-static unsigned r600_get_pixel_alignment(struct pipe_screen *screen,
+static unsigned r600_get_block_alignment(struct pipe_screen *screen,
enum pipe_format format,
unsigned array_mode)
{
@@ -105,6 +102,9 @@ static unsigned r600_get_pixel_alignment(struct pipe_screen *screen,
(((rscreen->tiling_info->group_bytes / 8 / pixsize)) *
rscreen->tiling_info->num_banks)) * 8;
break;
+ case V_038000_ARRAY_LINEAR_ALIGNED:
+ p_align = MAX2(64, rscreen->tiling_info->group_bytes / pixsize);
+ break;
case V_038000_ARRAY_LINEAR_GENERAL:
default:
p_align = rscreen->tiling_info->group_bytes / pixsize;
@@ -124,8 +124,10 @@ static unsigned r600_get_height_alignment(struct pipe_screen *screen,
h_align = rscreen->tiling_info->num_channels * 8;
break;
case V_038000_ARRAY_1D_TILED_THIN1:
+ case V_038000_ARRAY_LINEAR_ALIGNED:
h_align = 8;
break;
+ case V_038000_ARRAY_LINEAR_GENERAL:
default:
h_align = 1;
break;
@@ -139,7 +141,7 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen,
{
struct r600_screen* rscreen = (struct r600_screen *)screen;
unsigned pixsize = util_format_get_blocksize(format);
- int p_align = r600_get_pixel_alignment(screen, format, array_mode);
+ int p_align = r600_get_block_alignment(screen, format, array_mode);
int h_align = r600_get_height_alignment(screen, array_mode);
int b_align;
@@ -149,6 +151,8 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen,
p_align * pixsize * h_align);
break;
case V_038000_ARRAY_1D_TILED_THIN1:
+ case V_038000_ARRAY_LINEAR_ALIGNED:
+ case V_038000_ARRAY_LINEAR_GENERAL:
default:
b_align = rscreen->tiling_info->group_bytes;
break;
@@ -165,55 +169,46 @@ static unsigned mip_minify(unsigned size, unsigned level)
return val;
}
-static unsigned r600_texture_get_stride(struct pipe_screen *screen,
- struct r600_resource_texture *rtex,
- unsigned level)
+static unsigned r600_texture_get_nblocksx(struct pipe_screen *screen,
+ struct r600_resource_texture *rtex,
+ unsigned level)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
- unsigned width, stride, tile_width;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
+ unsigned nblocksx, block_align, width;
+ unsigned blocksize = util_format_get_blocksize(ptex->format);
if (rtex->pitch_override)
- return rtex->pitch_override;
+ return rtex->pitch_override / blocksize;
width = mip_minify(ptex->width0, level);
- if (util_format_is_plain(ptex->format)) {
- tile_width = r600_get_pixel_alignment(screen, ptex->format,
- rtex->array_mode[level]);
- width = align(width, tile_width);
- }
- stride = util_format_get_stride(ptex->format, width);
+ nblocksx = util_format_get_nblocksx(ptex->format, width);
- return stride;
+ block_align = r600_get_block_alignment(screen, ptex->format,
+ rtex->array_mode[level]);
+ nblocksx = align(nblocksx, block_align);
+ return nblocksx;
}
static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned level)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
unsigned height, tile_height;
height = mip_minify(ptex->height0, level);
- if (util_format_is_plain(ptex->format)) {
- tile_height = r600_get_height_alignment(screen,
- rtex->array_mode[level]);
- height = align(height, tile_height);
- }
- return util_format_get_nblocksy(ptex->format, height);
-}
-
-/* Get a width in pixels from a stride in bytes. */
-static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes)
-{
- return (pitch_in_bytes / util_format_get_blocksize(format)) *
- util_format_get_blockwidth(format);
+ height = util_format_get_nblocksy(ptex->format, height);
+ tile_height = r600_get_height_alignment(screen,
+ rtex->array_mode[level]);
+ height = align(height, tile_height);
+ return height;
}
static void r600_texture_set_array_mode(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned level, unsigned array_mode)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
switch (array_mode) {
case V_0280A0_ARRAY_LINEAR_GENERAL:
@@ -227,11 +222,11 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen,
unsigned w, h, tile_height, tile_width;
tile_height = r600_get_height_alignment(screen, array_mode);
- tile_width = r600_get_pixel_alignment(screen, ptex->format, array_mode);
+ tile_width = r600_get_block_alignment(screen, ptex->format, array_mode);
w = mip_minify(ptex->width0, level);
h = mip_minify(ptex->height0, level);
- if (w < tile_width || h < tile_height)
+ if (w <= tile_width || h <= tile_height)
rtex->array_mode[level] = V_0280A0_ARRAY_1D_TILED_THIN1;
else
rtex->array_mode[level] = array_mode;
@@ -244,40 +239,119 @@ static void r600_setup_miptree(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned array_mode)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
struct radeon *radeon = (struct radeon *)screen->winsys;
enum chip_class chipc = r600_get_family_class(radeon);
- unsigned pitch, size, layer_size, i, offset;
- unsigned nblocksy;
+ unsigned size, layer_size, i, offset;
+ unsigned nblocksx, nblocksy;
for (i = 0, offset = 0; i <= ptex->last_level; i++) {
+ unsigned blocksize = util_format_get_blocksize(ptex->format);
+
r600_texture_set_array_mode(screen, rtex, i, array_mode);
- pitch = r600_texture_get_stride(screen, rtex, i);
+ nblocksx = r600_texture_get_nblocksx(screen, rtex, i);
nblocksy = r600_texture_get_nblocksy(screen, rtex, i);
- layer_size = pitch * nblocksy;
-
+ layer_size = nblocksx * nblocksy * blocksize;
if (ptex->target == PIPE_TEXTURE_CUBE) {
if (chipc >= R700)
size = layer_size * 8;
else
size = layer_size * 6;
}
- else
+ else if (ptex->target == PIPE_TEXTURE_3D)
size = layer_size * u_minify(ptex->depth0, i);
+ else
+ size = layer_size * ptex->array_size;
+
/* align base image and start of miptree */
if ((i == 0) || (i == 1))
offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode));
rtex->offset[i] = offset;
rtex->layer_size[i] = layer_size;
- rtex->pitch_in_bytes[i] = pitch;
- rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch);
+ rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */
+ rtex->pitch_in_bytes[i] = nblocksx * blocksize;
+
offset += size;
}
rtex->size = offset;
}
+/* Figure out whether u_blitter will fallback to a transfer operation.
+ * If so, don't use a staging resource.
+ */
+static boolean permit_hardware_blit(struct pipe_screen *screen,
+ const struct pipe_resource *res)
+{
+ unsigned bind;
+
+ if (util_format_is_depth_or_stencil(res->format))
+ bind = PIPE_BIND_DEPTH_STENCIL;
+ else
+ bind = PIPE_BIND_RENDER_TARGET;
+
+ /* hackaround for S3TC */
+ if (util_format_is_compressed(res->format))
+ return TRUE;
+
+ if (!screen->is_format_supported(screen,
+ res->format,
+ res->target,
+ res->nr_samples,
+ bind))
+ return FALSE;
+
+ if (!screen->is_format_supported(screen,
+ res->format,
+ res->target,
+ res->nr_samples,
+ PIPE_BIND_SAMPLER_VIEW))
+ return FALSE;
+
+ return TRUE;
+}
+
+static boolean r600_texture_get_handle(struct pipe_screen* screen,
+ struct pipe_resource *ptex,
+ struct winsys_handle *whandle)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
+ struct r600_resource *resource = &rtex->resource;
+ struct radeon *radeon = (struct radeon *)screen->winsys;
+
+ return r600_bo_get_winsys_handle(radeon, resource->bo,
+ rtex->pitch_in_bytes[0], whandle);
+}
+
+static void r600_texture_destroy(struct pipe_screen *screen,
+ struct pipe_resource *ptex)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
+ struct r600_resource *resource = &rtex->resource;
+ struct radeon *radeon = (struct radeon *)screen->winsys;
+
+ if (rtex->flushed_depth_texture)
+ pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
+
+ if (resource->bo) {
+ r600_bo_reference(radeon, &resource->bo, NULL);
+ }
+ FREE(rtex);
+}
+
+static const struct u_resource_vtbl r600_texture_vtbl =
+{
+ r600_texture_get_handle, /* get_handle */
+ r600_texture_destroy, /* resource_destroy */
+ r600_texture_get_transfer, /* get_transfer */
+ r600_texture_transfer_destroy, /* transfer_destroy */
+ r600_texture_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region,/* transfer_flush_region */
+ r600_texture_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
static struct r600_resource_texture *
r600_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
@@ -295,21 +369,22 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
resource = &rtex->resource;
- resource->base.b = *base;
- resource->base.vtbl = &r600_texture_vtbl;
- pipe_reference_init(&resource->base.b.reference, 1);
- resource->base.b.screen = screen;
+ resource->b.b.b = *base;
+ resource->b.b.vtbl = &r600_texture_vtbl;
+ pipe_reference_init(&resource->b.b.b.reference, 1);
+ resource->b.b.b.screen = screen;
resource->bo = bo;
rtex->pitch_override = pitch_in_bytes_override;
+ /* only mark depth textures the HW can hit as depth textures */
+ if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base))
+ rtex->depth = 1;
- if (array_mode)
- rtex->tiled = 1;
r600_setup_miptree(screen, rtex, array_mode);
resource->size = rtex->size;
if (!resource->bo) {
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
int base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage);
@@ -329,56 +404,38 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
/* Would like some magic "get_bool_option_once" routine.
*/
- if (force_tiling == -1)
- force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE);
+ if (force_tiling == -1) {
+#if 0
+ /* reenable when 2D tiling is fixed better */
+ struct r600_screen *rscreen = (struct r600_screen *)screen;
+ if (r600_get_minor_version(rscreen->radeon) >= 9)
+ force_tiling = debug_get_bool_option("R600_TILING", TRUE);
+#endif
+ force_tiling = debug_get_bool_option("R600_TILING", FALSE);
+ }
- if (force_tiling) {
+ if (force_tiling && permit_hardware_blit(screen, templ)) {
if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
!(templ->bind & PIPE_BIND_SCANOUT)) {
array_mode = V_038000_ARRAY_2D_TILED_THIN1;
}
}
+ if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
+ util_format_is_compressed(templ->format))
+ array_mode = V_038000_ARRAY_1D_TILED_THIN1;
+
return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
0, 0, NULL);
}
-static void r600_texture_destroy(struct pipe_screen *screen,
- struct pipe_resource *ptex)
-{
- struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
- struct r600_resource *resource = &rtex->resource;
- struct radeon *radeon = (struct radeon *)screen->winsys;
-
- if (rtex->flushed_depth_texture)
- pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
-
- if (resource->bo) {
- r600_bo_reference(radeon, &resource->bo, NULL);
- }
- FREE(rtex);
-}
-
-static boolean r600_texture_get_handle(struct pipe_screen* screen,
- struct pipe_resource *ptex,
- struct winsys_handle *whandle)
-{
- struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
- struct r600_resource *resource = &rtex->resource;
- struct radeon *radeon = (struct radeon *)screen->winsys;
-
- return r600_bo_get_winsys_handle(radeon, resource->bo,
- rtex->pitch_in_bytes[0], whandle);
-}
-
static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *surf_tmpl)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
- unsigned tile_height;
unsigned level = surf_tmpl->u.tex.level;
assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
@@ -398,8 +455,8 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
surface->base.u.tex.level = level;
- tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]);
- surface->aligned_height = align(surface->base.height, tile_height);
+ surface->aligned_height = r600_texture_get_nblocksy(pipe->screen,
+ rtex, level);
return &surface->base;
}
@@ -435,18 +492,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
bo);
}
-static unsigned int r600_texture_is_referenced(struct pipe_context *context,
- struct pipe_resource *texture,
- unsigned level, int layer)
-{
- /* FIXME */
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture);
-
int r600_texture_depth_flush(struct pipe_context *ctx,
- struct pipe_resource *texture)
+ struct pipe_resource *texture, boolean just_create)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
struct pipe_resource resource;
@@ -459,7 +506,8 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
resource.width0 = texture->width0;
resource.height0 = texture->height0;
resource.depth0 = 1;
- resource.last_level = 0;
+ resource.array_size = 1;
+ resource.last_level = texture->last_level;
resource.nr_samples = 0;
resource.usage = PIPE_USAGE_DYNAMIC;
resource.bind = 0;
@@ -473,10 +521,14 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
return -ENOMEM;
}
+ ((struct r600_resource_texture *)rtex->flushed_depth_texture)->is_flushing_texture = TRUE;
out:
+ if (just_create)
+ return 0;
+
/* XXX: only do this if the depth texture has actually changed:
*/
- r600_blit_uncompress_depth_ptr(ctx, rtex);
+ r600_blit_uncompress_depth(ctx, rtex);
return 0;
}
@@ -487,46 +539,6 @@ static INLINE unsigned u_box_volume( const struct pipe_box *box )
return box->width * box->depth * box->height;
};
-
-/* Figure out whether u_blitter will fallback to a transfer operation.
- * If so, don't use a staging resource.
- */
-static boolean permit_hardware_blit(struct pipe_screen *screen,
- struct pipe_resource *res)
-{
- unsigned bind;
-
- if (util_format_is_depth_or_stencil(res->format))
- bind = PIPE_BIND_DEPTH_STENCIL;
- else
- bind = PIPE_BIND_RENDER_TARGET;
-
- /* See r600_resource_copy_region: there is something wrong
- * with depth resource copies at the moment so avoid them for
- * now.
- */
- if (util_format_get_component_bits(res->format,
- UTIL_FORMAT_COLORSPACE_ZS,
- 0) != 0)
- return FALSE;
-
- if (!screen->is_format_supported(screen,
- res->format,
- res->target,
- res->nr_samples,
- bind, 0))
- return FALSE;
-
- if (!screen->is_format_supported(screen,
- res->format,
- res->target,
- res->nr_samples,
- PIPE_BIND_SAMPLER_VIEW, 0))
- return FALSE;
-
- return TRUE;
-}
-
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
struct pipe_resource *texture,
unsigned level,
@@ -546,7 +558,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
* the CPU is much happier reading out of cached system memory
* than uncached VRAM.
*/
- if (rtex->tiled)
+ if (R600_TEX_IS_TILED(rtex, level))
use_staging_texture = TRUE;
if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024)
@@ -579,13 +591,16 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
*/
/* XXX: when discard is true, no need to read back from depth texture
*/
- r = r600_texture_depth_flush(ctx, texture);
+ r = r600_texture_depth_flush(ctx, texture, FALSE);
if (r < 0) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
pipe_resource_reference(&trans->transfer.resource, NULL);
FREE(trans);
return NULL;
}
+ trans->transfer.stride = rtex->flushed_depth_texture->pitch_in_bytes[level];
+ trans->offset = r600_texture_get_offset(rtex->flushed_depth_texture, level, box->z);
+ return &trans->transfer;
} else if (use_staging_texture) {
resource.target = PIPE_TEXTURE_2D;
resource.format = texture->format;
@@ -622,11 +637,12 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
if (usage & PIPE_TRANSFER_READ) {
r600_copy_to_staging_texture(ctx, trans);
/* Always referenced in the blit. */
- ctx->flush(ctx, 0, NULL);
+ ctx->flush(ctx, NULL);
}
return &trans->transfer;
}
trans->transfer.stride = rtex->pitch_in_bytes[level];
+ trans->transfer.layer_stride = rtex->layer_size[level];
trans->offset = r600_texture_get_offset(rtex, level, box->z);
return &trans->transfer;
}
@@ -635,7 +651,8 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
struct pipe_transfer *transfer)
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource;
+ struct pipe_resource *texture = transfer->resource;
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
if (rtransfer->staging_texture) {
if (transfer->usage & PIPE_TRANSFER_WRITE) {
@@ -643,9 +660,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
}
pipe_resource_reference(&rtransfer->staging_texture, NULL);
}
- if (rtex->flushed_depth_texture) {
- pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
+
+ if (rtex->depth && !rtex->is_flushing_texture) {
+ if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtex->flushed_depth_texture)
+ r600_blit_push_depth(ctx, rtex);
}
+
pipe_resource_reference(&transfer->resource, NULL);
FREE(transfer);
}
@@ -727,19 +747,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
r600_bo_unmap(radeon, bo);
}
-struct u_resource_vtbl r600_texture_vtbl =
-{
- r600_texture_get_handle, /* get_handle */
- r600_texture_destroy, /* resource_destroy */
- r600_texture_is_referenced, /* is_resource_referenced */
- r600_texture_get_transfer, /* get_transfer */
- r600_texture_transfer_destroy, /* transfer_destroy */
- r600_texture_transfer_map, /* transfer_map */
- u_default_transfer_flush_region,/* transfer_flush_region */
- r600_texture_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
-};
-
void r600_init_surface_functions(struct r600_pipe_context *r600)
{
r600->context.create_surface = r600_create_surface;
@@ -795,13 +802,16 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
}
/* texture format translate */
-uint32_t r600_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle_view,
+uint32_t r600_translate_texformat(struct pipe_screen *screen,
+ enum pipe_format format,
+ const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p)
{
uint32_t result = 0, word4 = 0, yuv_format = 0;
const struct util_format_description *desc;
boolean uniform = TRUE;
+ static int r600_enable_s3tc = -1;
+
int i;
const uint32_t sign_bit[4] = {
S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED),
@@ -850,37 +860,65 @@ uint32_t r600_translate_texformat(enum pipe_format format,
break;
}
goto out_unknown; /* TODO */
-
+
case UTIL_FORMAT_COLORSPACE_SRGB:
word4 |= S_038010_FORCE_DEGAMMA(1);
- if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB)
- goto out_unknown; /* fails for some reason - TODO */
break;
default:
break;
}
- /* S3TC formats. TODO */
- if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- static int r600_enable_s3tc = -1;
+ if (r600_enable_s3tc == -1) {
+ struct r600_screen *rscreen = (struct r600_screen *)screen;
+ if (r600_get_minor_version(rscreen->radeon) >= 9)
+ r600_enable_s3tc = 1;
+ else
+ r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ if (!r600_enable_s3tc)
+ goto out_unknown;
- if (r600_enable_s3tc == -1)
- r600_enable_s3tc =
- debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_SNORM:
+ word4 |= sign_bit[0];
+ case PIPE_FORMAT_RGTC1_UNORM:
+ result = FMT_BC4;
+ goto out_word4;
+ case PIPE_FORMAT_RGTC2_SNORM:
+ word4 |= sign_bit[0] | sign_bit[1];
+ case PIPE_FORMAT_RGTC2_UNORM:
+ result = FMT_BC5;
+ goto out_word4;
+ default:
+ goto out_unknown;
+ }
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
if (!r600_enable_s3tc)
goto out_unknown;
+ if (!util_format_s3tc_enabled) {
+ goto out_unknown;
+ }
+
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT1_SRGB:
+ case PIPE_FORMAT_DXT1_SRGBA:
result = FMT_BC1;
goto out_word4;
case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT3_SRGBA:
result = FMT_BC2;
goto out_word4;
case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_DXT5_SRGBA:
result = FMT_BC3;
goto out_word4;
default:
@@ -889,7 +927,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
}
- for (i = 0; i < desc->nr_channels; i++) {
+ for (i = 0; i < desc->nr_channels; i++) {
if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
word4 |= sign_bit[i];
}
@@ -897,13 +935,11 @@ uint32_t r600_translate_texformat(enum pipe_format format,
/* R8G8Bx_SNORM - TODO CxV8U8 */
- /* RGTC - TODO */
-
/* See whether the components are of the same size. */
for (i = 1; i < desc->nr_channels; i++) {
uniform = uniform && desc->channel[0].size == desc->channel[i].size;
}
-
+
/* Non-uniform formats. */
if (!uniform) {
switch(desc->nr_channels) {
@@ -927,7 +963,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
desc->channel[1].size == 10 &&
desc->channel[2].size == 10 &&
desc->channel[3].size == 2) {
- result = FMT_10_10_10_2;
+ result = FMT_2_10_10_10;
goto out_word4;
}
goto out_unknown;
@@ -990,6 +1026,19 @@ uint32_t r600_translate_texformat(enum pipe_format format,
result = FMT_16_16_16_16;
goto out_word4;
}
+ goto out_unknown;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_32;
+ goto out_word4;
+ case 2:
+ result = FMT_32_32;
+ goto out_word4;
+ case 4:
+ result = FMT_32_32_32_32;
+ goto out_word4;
+ }
}
goto out_unknown;
@@ -1021,7 +1070,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
goto out_word4;
}
}
-
+
}
out_word4:
if (word4_p)
diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c
index 1c227d3215..7482d15e12 100644
--- a/src/gallium/drivers/r600/r600_translate.c
+++ b/src/gallium/drivers/r600/r600_translate.c
@@ -22,181 +22,34 @@
*
* Authors: Dave Airlie <airlied@redhat.com>
*/
-#include "translate/translate_cache.h"
-#include "translate/translate.h"
-#include <pipebuffer/pb_buffer.h>
+
#include <util/u_index_modify.h>
+#include "util/u_inlines.h"
+#include "util/u_upload_mgr.h"
#include "r600_pipe.h"
-void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
-{
- struct pipe_context *pipe = &rctx->context;
- struct translate_key key = {0};
- struct translate_element *te;
- unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0};
- struct translate *tr;
- struct r600_vertex_element *ve = rctx->vertex_elements;
- boolean vb_translated[PIPE_MAX_ATTRIBS] = {0};
- void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map;
- struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
- struct pipe_resource *out_buffer;
- unsigned i, num_verts;
- struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
-
- /* Initialize the translate key, i.e. the recipe how vertices should be
- * translated. */
- for (i = 0; i < ve->count; i++) {
- struct pipe_vertex_buffer *vb =
- &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index];
- enum pipe_format output_format = ve->hw_format[i];
- unsigned output_format_size = ve->hw_format_size[i];
-
- /* Check for support. */
- if (ve->elements[i].src_format == ve->hw_format[i]) {
- continue;
- }
-
- /* Workaround for translate: output floats instead of halfs. */
- switch (output_format) {
- case PIPE_FORMAT_R16_FLOAT:
- output_format = PIPE_FORMAT_R32_FLOAT;
- output_format_size = 4;
- break;
- case PIPE_FORMAT_R16G16_FLOAT:
- output_format = PIPE_FORMAT_R32G32_FLOAT;
- output_format_size = 8;
- break;
- case PIPE_FORMAT_R16G16B16_FLOAT:
- output_format = PIPE_FORMAT_R32G32B32_FLOAT;
- output_format_size = 12;
- break;
- case PIPE_FORMAT_R16G16B16A16_FLOAT:
- output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- output_format_size = 16;
- break;
- default:;
- }
-
- /* Add this vertex element. */
- te = &key.element[key.nr_elements];
- /*te->type;
- te->instance_divisor;*/
- te->input_buffer = ve->elements[i].vertex_buffer_index;
- te->input_format = ve->elements[i].src_format;
- te->input_offset = vb->buffer_offset + ve->elements[i].src_offset;
- te->output_format = output_format;
- te->output_offset = key.output_stride;
-
- key.output_stride += output_format_size;
- vb_translated[ve->elements[i].vertex_buffer_index] = TRUE;
- tr_elem_index[i] = key.nr_elements;
- key.nr_elements++;
- }
-
- /* Get a translate object. */
- tr = translate_cache_find(rctx->tran.translate_cache, &key);
-
- /* Map buffers we want to translate. */
- for (i = 0; i < rctx->nvertex_buffer; i++) {
- if (vb_translated[i]) {
- struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
-
- vb_map[i] = pipe_buffer_map(pipe, vb->buffer,
- PIPE_TRANSFER_READ, &vb_transfer[i]);
-
- tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index);
- }
- }
-
- /* Create and map the output buffer. */
- num_verts = rctx->vb_max_index + 1;
-
- out_buffer = pipe_buffer_create(&rctx->screen->screen,
- PIPE_BIND_VERTEX_BUFFER,
- key.output_stride * num_verts);
-
- out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE,
- &out_transfer);
-
- /* Translate. */
- tr->run(tr, 0, num_verts, 0, out_map);
-
- /* Unmap all buffers. */
- for (i = 0; i < rctx->nvertex_buffer; i++) {
- if (vb_translated[i]) {
- pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer,
- vb_transfer[i]);
- }
- }
-
- pipe_buffer_unmap(pipe, out_buffer, out_transfer);
-
- /* Setup the new vertex buffer in the first free slot. */
- for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
- struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
-
- if (!vb->buffer) {
- pipe_resource_reference(&vb->buffer, out_buffer);
- vb->buffer_offset = 0;
- vb->max_index = num_verts - 1;
- vb->stride = key.output_stride;
- rctx->tran.vb_slot = i;
- break;
- }
- }
-
- /* Save and replace vertex elements. */
- for (i = 0; i < ve->count; i++) {
- if (vb_translated[ve->elements[i].vertex_buffer_index]) {
- te = &key.element[tr_elem_index[i]];
- new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
- new_velems[i].src_format = te->output_format;
- new_velems[i].src_offset = te->output_offset;
- new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
- } else {
- memcpy(&new_velems[i], &ve->elements[i],
- sizeof(struct pipe_vertex_element));
- }
- }
-
- rctx->tran.new_velems = pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
- pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems);
-
- pipe_resource_reference(&out_buffer, NULL);
-}
-
-void r600_end_vertex_translate(struct r600_pipe_context *rctx)
-{
- struct pipe_context *pipe = &rctx->context;
-
- if (rctx->tran.new_velems == NULL) {
- return;
- }
- /* Restore vertex elements. */
- if (rctx->vertex_elements == rctx->tran.new_velems) {
- pipe->bind_vertex_elements_state(pipe, NULL);
- }
- pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems);
- rctx->tran.new_velems = NULL;
-
- /* Delete the now-unused VBO. */
- pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer,
- NULL);
-}
void r600_translate_index_buffer(struct r600_pipe_context *r600,
- struct pipe_resource **index_buffer,
- unsigned *index_size,
- unsigned *start, unsigned count)
+ struct pipe_resource **index_buffer,
+ unsigned *index_size,
+ unsigned *start, unsigned count)
{
+ struct pipe_resource *out_buffer = NULL;
+ unsigned out_offset;
+ void *ptr;
+ boolean flushed;
+
switch (*index_size) {
case 1:
- util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count);
+ u_upload_alloc(r600->vbuf_mgr->uploader, 0, count * 2,
+ &out_offset, &out_buffer, &flushed, &ptr);
+
+ util_shorten_ubyte_elts_to_userptr(
+ &r600->context, *index_buffer, 0, *start, count, ptr);
+
+ pipe_resource_reference(index_buffer, out_buffer);
*index_size = 2;
- *start = 0;
- break;
- case 2:
- case 4:
+ *start = out_offset / 2;
break;
}
}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index ae19bfb828..df70e2889e 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -67,6 +67,10 @@
#define PKT3_SET_CTL_CONST 0x6F
#define PKT3_SURFACE_BASE_UPDATE 0x73
+#define PREDICATION_OP_CLEAR 0x0
+#define PREDICATION_OP_ZPASS 0x1
+#define PREDICATION_OP_PRIMCOUNT 0x2
+
#define PKT_TYPE_S(x) (((x) & 0x3) << 30)
#define PKT_TYPE_G(x) (((x) >> 30) & 0x3)
#define PKT_TYPE_C 0x3FFFFFFF
@@ -248,6 +252,8 @@
#define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27)
#define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1)
#define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF
+#define V_0280A0_EXPORT_FULL 0
+#define V_0280A0_EXPORT_NORM 1
#define R_028060_CB_COLOR0_SIZE 0x028060
#define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0)
#define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF)
@@ -960,8 +966,8 @@
#define S_038010_SRF_MODE_ALL(x) (((x) & 0x1) << 10)
#define G_038010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1)
#define C_038010_SRF_MODE_ALL 0xFFFFFBFF
-#define V_038010_SFR_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000
-#define V_038010_SFR_MODE_NO_ZERO 0x00000001
+#define V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000
+#define V_038010_SRF_MODE_NO_ZERO 0x00000001
#define S_038010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11)
#define G_038010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1)
#define C_038010_FORCE_DEGAMMA 0xFFFFF7FF
@@ -2332,31 +2338,6 @@
#define R_0280D4_CB_COLOR5_TILE 0x0280D4
#define R_0280D8_CB_COLOR6_TILE 0x0280D8
#define R_0280DC_CB_COLOR7_TILE 0x0280DC
-#define R_028808_CB_COLOR_CONTROL 0x028808
-#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0)
-#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1)
-#define C_028808_FOG_ENABLE 0xFFFFFFFE
-#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1)
-#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1)
-#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD
-#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2)
-#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1)
-#define C_028808_DITHER_ENABLE 0xFFFFFFFB
-#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
-#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
-#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7
-#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4)
-#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7)
-#define C_028808_SPECIAL_OP 0xFFFFFF8F
-#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7)
-#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1)
-#define C_028808_PER_MRT_BLEND 0xFFFFFF7F
-#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8)
-#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF)
-#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF
-#define S_028808_ROP3(x) (((x) & 0xFF) << 16)
-#define G_028808_ROP3(x) (((x) >> 16) & 0xFF)
-#define C_028808_ROP3 0xFF00FFFF
#define R_028614_SPI_VS_OUT_ID_0 0x028614
#define S_028614_SEMANTIC_0(x) (((x) & 0xFF) << 0)
#define G_028614_SEMANTIC_0(x) (((x) >> 0) & 0xFF)
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index 892dee86ba..b3c7d1494f 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -26,11 +26,18 @@
#include "r600_asm.h"
#include "r700_sq.h"
+void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf)
+{
+ unsigned count = (cf->ndw / 4) - 1;
+ *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
+ *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count) |
+ S_SQ_CF_WORD1_COUNT_3(count >> 3);
+}
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
{
- unsigned i;
-
bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
@@ -61,18 +68,11 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+ S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
}
- if (alu->last) {
- if (alu->nliteral && !alu->literal_added) {
- R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
- }
- for (i = 0; i < alu->nliteral; i++) {
- bc->bytecode[id++] = alu->value[i];
- }
- }
return 0;
}
diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c
index 94e57e40f8..6e2d6ba1ef 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -44,7 +44,10 @@ rbug_destroy(struct pipe_context *_pipe)
struct pipe_context *pipe = rb_pipe->pipe;
remove_from_list(&rb_pipe->list);
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->destroy(pipe);
+ rb_pipe->pipe = NULL;
+ pipe_mutex_unlock(rb_pipe->call_mutex);
FREE(rb_pipe);
}
@@ -112,7 +115,12 @@ rbug_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info)
pipe_mutex_lock(rb_pipe->draw_mutex);
rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE);
- pipe->draw_vbo(pipe, info);
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ if (!(rb_pipe->curr.fs && rb_pipe->curr.fs->disabled) &&
+ !(rb_pipe->curr.gs && rb_pipe->curr.gs->disabled) &&
+ !(rb_pipe->curr.vs && rb_pipe->curr.vs->disabled))
+ pipe->draw_vbo(pipe, info);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER);
pipe_mutex_unlock(rb_pipe->draw_mutex);
@@ -125,8 +133,10 @@ rbug_create_query(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
return pipe->create_query(pipe,
query_type);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -136,8 +146,10 @@ rbug_destroy_query(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->destroy_query(pipe,
query);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -147,8 +159,10 @@ rbug_begin_query(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->begin_query(pipe,
query);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -158,8 +172,10 @@ rbug_end_query(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->end_query(pipe,
query);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static boolean
@@ -170,11 +186,16 @@ rbug_get_query_result(struct pipe_context *_pipe,
{
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ boolean ret;
- return pipe->get_query_result(pipe,
- query,
- wait,
- result);
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ ret = pipe->get_query_result(pipe,
+ query,
+ wait,
+ result);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
+
+ return ret;
}
static void *
@@ -183,9 +204,14 @@ rbug_create_blend_state(struct pipe_context *_pipe,
{
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ void *ret;
+
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ ret = pipe->create_blend_state(pipe,
+ blend);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
- return pipe->create_blend_state(pipe,
- blend);
+ return ret;
}
static void
@@ -195,8 +221,10 @@ rbug_bind_blend_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->bind_blend_state(pipe,
- blend);
+ blend);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -206,8 +234,10 @@ rbug_delete_blend_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->delete_blend_state(pipe,
blend);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void *
@@ -216,9 +246,14 @@ rbug_create_sampler_state(struct pipe_context *_pipe,
{
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ void *ret;
+
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ ret = pipe->create_sampler_state(pipe,
+ sampler);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
- return pipe->create_sampler_state(pipe,
- sampler);
+ return ret;
}
static void
@@ -229,9 +264,11 @@ rbug_bind_fragment_sampler_states(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->bind_fragment_sampler_states(pipe,
num_samplers,
samplers);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -242,9 +279,11 @@ rbug_bind_vertex_sampler_states(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->bind_vertex_sampler_states(pipe,
num_samplers,
samplers);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -254,8 +293,10 @@ rbug_delete_sampler_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->delete_sampler_state(pipe,
sampler);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void *
@@ -264,9 +305,14 @@ rbug_create_rasterizer_state(struct pipe_context *_pipe,
{
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ void *ret;
- return pipe->create_rasterizer_state(pipe,
- rasterizer);
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ ret = pipe->create_rasterizer_state(pipe,
+ rasterizer);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
+
+ return ret;
}
static void
@@ -276,8 +322,10 @@ rbug_bind_rasterizer_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->bind_rasterizer_state(pipe,
rasterizer);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -287,8 +335,10 @@ rbug_delete_rasterizer_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->delete_rasterizer_state(pipe,
rasterizer);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void *
@@ -297,9 +347,14 @@ rbug_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
{
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ void *ret;
+
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ ret = pipe->create_depth_stencil_alpha_state(pipe,
+ depth_stencil_alpha);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
- return pipe->create_depth_stencil_alpha_state(pipe,
- depth_stencil_alpha);
+ return ret;
}
static void
@@ -309,8 +364,10 @@ rbug_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->bind_depth_stencil_alpha_state(pipe,
depth_stencil_alpha);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -320,8 +377,10 @@ rbug_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->delete_depth_stencil_alpha_state(pipe,
depth_stencil_alpha);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void *
@@ -332,7 +391,10 @@ rbug_create_fs_state(struct pipe_context *_pipe,
struct pipe_context *pipe = rb_pipe->pipe;
void *result;
+ pipe_mutex_lock(rb_pipe->call_mutex);
result = pipe->create_fs_state(pipe, state);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
+
if (!result)
return NULL;
@@ -347,10 +409,14 @@ rbug_bind_fs_state(struct pipe_context *_pipe,
struct pipe_context *pipe = rb_pipe->pipe;
void *fs;
+ pipe_mutex_lock(rb_pipe->call_mutex);
+
fs = rbug_shader_unwrap(_fs);
rb_pipe->curr.fs = rbug_shader(_fs);
pipe->bind_fs_state(pipe,
fs);
+
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -360,7 +426,9 @@ rbug_delete_fs_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct rbug_shader *rb_shader = rbug_shader(_fs);
+ pipe_mutex_lock(rb_pipe->call_mutex);
rbug_shader_destroy(rb_pipe, rb_shader);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void *
@@ -371,7 +439,10 @@ rbug_create_vs_state(struct pipe_context *_pipe,
struct pipe_context *pipe = rb_pipe->pipe;
void *result;
+ pipe_mutex_lock(rb_pipe->call_mutex);
result = pipe->create_vs_state(pipe, state);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
+
if (!result)
return NULL;
@@ -386,10 +457,14 @@ rbug_bind_vs_state(struct pipe_context *_pipe,
struct pipe_context *pipe = rb_pipe->pipe;
void *vs;
+ pipe_mutex_lock(rb_pipe->call_mutex);
+
vs = rbug_shader_unwrap(_vs);
rb_pipe->curr.vs = rbug_shader(_vs);
pipe->bind_vs_state(pipe,
vs);
+
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -399,7 +474,9 @@ rbug_delete_vs_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct rbug_shader *rb_shader = rbug_shader(_vs);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
rbug_shader_destroy(rb_pipe, rb_shader);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void *
@@ -410,7 +487,10 @@ rbug_create_gs_state(struct pipe_context *_pipe,
struct pipe_context *pipe = rb_pipe->pipe;
void *result;
+ pipe_mutex_lock(rb_pipe->call_mutex);
result = pipe->create_gs_state(pipe, state);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
+
if (!result)
return NULL;
@@ -425,10 +505,14 @@ rbug_bind_gs_state(struct pipe_context *_pipe,
struct pipe_context *pipe = rb_pipe->pipe;
void *gs;
+ pipe_mutex_lock(rb_pipe->call_mutex);
+
gs = rbug_shader_unwrap(_gs);
rb_pipe->curr.gs = rbug_shader(_gs);
pipe->bind_gs_state(pipe,
gs);
+
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -438,7 +522,9 @@ rbug_delete_gs_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct rbug_shader *rb_shader = rbug_shader(_gs);
+ pipe_mutex_lock(rb_pipe->call_mutex);
rbug_shader_destroy(rb_pipe, rb_shader);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void *
@@ -448,10 +534,15 @@ rbug_create_vertex_elements_state(struct pipe_context *_pipe,
{
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ void *ret;
- return pipe->create_vertex_elements_state(pipe,
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ ret = pipe->create_vertex_elements_state(pipe,
num_elements,
vertex_elements);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
+
+ return ret;
}
static void
@@ -461,8 +552,10 @@ rbug_bind_vertex_elements_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->bind_vertex_elements_state(pipe,
velems);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -472,8 +565,10 @@ rbug_delete_vertex_elements_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->delete_vertex_elements_state(pipe,
velems);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -483,8 +578,10 @@ rbug_set_blend_color(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->set_blend_color(pipe,
blend_color);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -494,8 +591,10 @@ rbug_set_stencil_ref(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->set_stencil_ref(pipe,
stencil_ref);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -505,8 +604,10 @@ rbug_set_clip_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->set_clip_state(pipe,
clip);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -526,10 +627,12 @@ rbug_set_constant_buffer(struct pipe_context *_pipe,
resource = unwrapped_resource;
}
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->set_constant_buffer(pipe,
shader,
index,
resource);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -542,8 +645,12 @@ rbug_set_framebuffer_state(struct pipe_context *_pipe,
struct pipe_framebuffer_state *state = NULL;
unsigned i;
+ /* must protect curr status */
+ pipe_mutex_lock(rb_pipe->call_mutex);
+
rb_pipe->curr.nr_cbufs = 0;
memset(rb_pipe->curr.cbufs, 0, sizeof(rb_pipe->curr.cbufs));
+ rb_pipe->curr.zsbuf = NULL;
/* unwrap the input state */
if (_state) {
@@ -556,11 +663,15 @@ rbug_set_framebuffer_state(struct pipe_context *_pipe,
rb_pipe->curr.cbufs[i] = rbug_resource(_state->cbufs[i]->texture);
}
unwrapped_state.zsbuf = rbug_surface_unwrap(_state->zsbuf);
+ if (_state->zsbuf)
+ rb_pipe->curr.zsbuf = rbug_resource(_state->zsbuf->texture);
state = &unwrapped_state;
}
pipe->set_framebuffer_state(pipe,
state);
+
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -570,8 +681,10 @@ rbug_set_polygon_stipple(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->set_polygon_stipple(pipe,
poly_stipple);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -581,8 +694,10 @@ rbug_set_scissor_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->set_scissor_state(pipe,
scissor);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -592,8 +707,10 @@ rbug_set_viewport_state(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->set_viewport_state(pipe,
viewport);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -607,6 +724,9 @@ rbug_set_fragment_sampler_views(struct pipe_context *_pipe,
struct pipe_sampler_view **views = NULL;
unsigned i;
+ /* must protect curr status */
+ pipe_mutex_lock(rb_pipe->call_mutex);
+
rb_pipe->curr.num_fs_views = 0;
memset(rb_pipe->curr.fs_views, 0, sizeof(rb_pipe->curr.fs_views));
memset(rb_pipe->curr.fs_texs, 0, sizeof(rb_pipe->curr.fs_texs));
@@ -623,6 +743,8 @@ rbug_set_fragment_sampler_views(struct pipe_context *_pipe,
}
pipe->set_fragment_sampler_views(pipe, num, views);
+
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -636,6 +758,9 @@ rbug_set_vertex_sampler_views(struct pipe_context *_pipe,
struct pipe_sampler_view **views = NULL;
unsigned i;
+ /* must protect curr status */
+ pipe_mutex_lock(rb_pipe->call_mutex);
+
rb_pipe->curr.num_vs_views = 0;
memset(rb_pipe->curr.vs_views, 0, sizeof(rb_pipe->curr.vs_views));
memset(rb_pipe->curr.vs_texs, 0, sizeof(rb_pipe->curr.vs_texs));
@@ -652,6 +777,8 @@ rbug_set_vertex_sampler_views(struct pipe_context *_pipe,
}
pipe->set_vertex_sampler_views(pipe, num, views);
+
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -665,6 +792,8 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe,
struct pipe_vertex_buffer *buffers = NULL;
unsigned i;
+ pipe_mutex_lock(rb_pipe->call_mutex);
+
if (num_buffers) {
memcpy(unwrapped_buffers, _buffers, num_buffers * sizeof(*_buffers));
for (i = 0; i < num_buffers; i++)
@@ -675,6 +804,8 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe,
pipe->set_vertex_buffers(pipe,
num_buffers,
buffers);
+
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -691,7 +822,9 @@ rbug_set_index_buffer(struct pipe_context *_pipe,
ib = &unwrapped_ib;
}
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->set_index_buffer(pipe, ib);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -701,7 +834,9 @@ rbug_set_sample_mask(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->set_sample_mask(pipe, sample_mask);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -722,6 +857,7 @@ rbug_resource_copy_region(struct pipe_context *_pipe,
struct pipe_resource *dst = rb_resource_dst->resource;
struct pipe_resource *src = rb_resource_src->resource;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->resource_copy_region(pipe,
dst,
dst_level,
@@ -731,6 +867,7 @@ rbug_resource_copy_region(struct pipe_context *_pipe,
src,
src_level,
src_box);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -743,11 +880,13 @@ rbug_clear(struct pipe_context *_pipe,
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->clear(pipe,
buffers,
rgba,
depth,
stencil);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -762,6 +901,7 @@ rbug_clear_render_target(struct pipe_context *_pipe,
struct pipe_context *pipe = rb_pipe->pipe;
struct pipe_surface *dst = rb_surface_dst->surface;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->clear_render_target(pipe,
dst,
rgba,
@@ -769,6 +909,7 @@ rbug_clear_render_target(struct pipe_context *_pipe,
dsty,
width,
height);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
@@ -785,6 +926,7 @@ rbug_clear_depth_stencil(struct pipe_context *_pipe,
struct pipe_context *pipe = rb_pipe->pipe;
struct pipe_surface *dst = rb_surface_dst->surface;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->clear_depth_stencil(pipe,
dst,
clear_flags,
@@ -794,36 +936,20 @@ rbug_clear_depth_stencil(struct pipe_context *_pipe,
dsty,
width,
height);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void
rbug_flush(struct pipe_context *_pipe,
- unsigned flags,
struct pipe_fence_handle **fence)
{
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe->pipe;
+ pipe_mutex_lock(rb_pipe->call_mutex);
pipe->flush(pipe,
- flags,
fence);
-}
-
-static unsigned int
-rbug_is_resource_referenced(struct pipe_context *_pipe,
- struct pipe_resource *_resource,
- unsigned level,
- int layer)
-{
- struct rbug_context *rb_pipe = rbug_context(_pipe);
- struct rbug_resource *rb_resource = rbug_resource(_resource);
- struct pipe_context *pipe = rb_pipe->pipe;
- struct pipe_resource *resource = rb_resource->resource;
-
- return pipe->is_resource_referenced(pipe,
- resource,
- level,
- layer);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static struct pipe_sampler_view *
@@ -837,9 +963,11 @@ rbug_context_create_sampler_view(struct pipe_context *_pipe,
struct pipe_resource *resource = rb_resource->resource;
struct pipe_sampler_view *result;
+ pipe_mutex_lock(rb_pipe->call_mutex);
result = pipe->create_sampler_view(pipe,
resource,
templ);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
if (result)
return rbug_sampler_view_create(rb_pipe, rb_resource, result);
@@ -865,9 +993,11 @@ rbug_context_create_surface(struct pipe_context *_pipe,
struct pipe_resource *resource = rb_resource->resource;
struct pipe_surface *result;
+ pipe_mutex_lock(rb_pipe->call_mutex);
result = pipe->create_surface(pipe,
resource,
surf_tmpl);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
if (result)
return rbug_surface_create(rb_pipe, rb_resource, result);
@@ -878,8 +1008,13 @@ static void
rbug_context_surface_destroy(struct pipe_context *_pipe,
struct pipe_surface *_surface)
{
- rbug_surface_destroy(rbug_context(_pipe),
- rbug_surface(_surface));
+ struct rbug_context *rb_pipe = rbug_context(_pipe);
+ struct rbug_surface *rb_surface = rbug_surface(_surface);
+
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ rbug_surface_destroy(rb_pipe,
+ rb_surface);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
@@ -897,11 +1032,13 @@ rbug_context_get_transfer(struct pipe_context *_context,
struct pipe_resource *resource = rb_resource->resource;
struct pipe_transfer *result;
+ pipe_mutex_lock(rb_pipe->call_mutex);
result = context->get_transfer(context,
resource,
level,
usage,
box);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
if (result)
return rbug_transfer_create(rb_pipe, rb_resource, result);
@@ -912,8 +1049,13 @@ static void
rbug_context_transfer_destroy(struct pipe_context *_pipe,
struct pipe_transfer *_transfer)
{
- rbug_transfer_destroy(rbug_context(_pipe),
- rbug_transfer(_transfer));
+ struct rbug_context *rb_pipe = rbug_context(_pipe);
+ struct rbug_transfer *rb_transfer =rbug_transfer(_transfer);
+
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ rbug_transfer_destroy(rb_pipe,
+ rb_transfer);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
static void *
@@ -924,9 +1066,14 @@ rbug_context_transfer_map(struct pipe_context *_context,
struct rbug_transfer *rb_transfer = rbug_transfer(_transfer);
struct pipe_context *context = rb_pipe->pipe;
struct pipe_transfer *transfer = rb_transfer->transfer;
+ void *ret;
- return context->transfer_map(context,
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ ret = context->transfer_map(context,
transfer);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
+
+ return ret;
}
@@ -941,9 +1088,11 @@ rbug_context_transfer_flush_region(struct pipe_context *_context,
struct pipe_context *context = rb_pipe->pipe;
struct pipe_transfer *transfer = rb_transfer->transfer;
+ pipe_mutex_lock(rb_pipe->call_mutex);
context->transfer_flush_region(context,
transfer,
box);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
@@ -956,8 +1105,10 @@ rbug_context_transfer_unmap(struct pipe_context *_context,
struct pipe_context *context = rb_pipe->pipe;
struct pipe_transfer *transfer = rb_transfer->transfer;
+ pipe_mutex_lock(rb_pipe->call_mutex);
context->transfer_unmap(context,
transfer);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
@@ -976,6 +1127,7 @@ rbug_context_transfer_inline_write(struct pipe_context *_context,
struct pipe_context *context = rb_pipe->pipe;
struct pipe_resource *resource = rb_resource->resource;
+ pipe_mutex_lock(rb_pipe->call_mutex);
context->transfer_inline_write(context,
resource,
level,
@@ -984,6 +1136,22 @@ rbug_context_transfer_inline_write(struct pipe_context *_context,
data,
stride,
layer_stride);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
+}
+
+
+static void rbug_redefine_user_buffer(struct pipe_context *_context,
+ struct pipe_resource *_resource,
+ unsigned offset, unsigned size)
+{
+ struct rbug_context *rb_pipe = rbug_context(_context);
+ struct rbug_resource *rb_resource = rbug_resource(_resource);
+ struct pipe_context *context = rb_pipe->pipe;
+ struct pipe_resource *resource = rb_resource->resource;
+
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ context->redefine_user_buffer(context, resource, offset, size);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
}
@@ -1061,7 +1229,6 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
rb_pipe->base.clear_render_target = rbug_clear_render_target;
rb_pipe->base.clear_depth_stencil = rbug_clear_depth_stencil;
rb_pipe->base.flush = rbug_flush;
- rb_pipe->base.is_resource_referenced = rbug_is_resource_referenced;
rb_pipe->base.create_sampler_view = rbug_context_create_sampler_view;
rb_pipe->base.sampler_view_destroy = rbug_context_sampler_view_destroy;
rb_pipe->base.create_surface = rbug_context_create_surface;
@@ -1072,6 +1239,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
rb_pipe->base.transfer_unmap = rbug_context_transfer_unmap;
rb_pipe->base.transfer_flush_region = rbug_context_transfer_flush_region;
rb_pipe->base.transfer_inline_write = rbug_context_transfer_inline_write;
+ rb_pipe->base.redefine_user_buffer = rbug_redefine_user_buffer;
rb_pipe->pipe = pipe;
diff --git a/src/gallium/drivers/rbug/rbug_core.c b/src/gallium/drivers/rbug/rbug_core.c
index eb772d19d0..b80bcd4228 100644
--- a/src/gallium/drivers/rbug/rbug_core.c
+++ b/src/gallium/drivers/rbug/rbug_core.c
@@ -498,7 +498,7 @@ rbug_context_flush(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32
/* protect the pipe context */
pipe_mutex_lock(rb_context->call_mutex);
- rb_context->pipe->flush(rb_context->pipe, flush->flags, NULL);
+ rb_context->pipe->flush(rb_context->pipe, NULL);
pipe_mutex_unlock(rb_context->call_mutex);
pipe_mutex_unlock(rb_screen->list_mutex);
diff --git a/src/gallium/drivers/rbug/rbug_objects.c b/src/gallium/drivers/rbug/rbug_objects.c
index 7d7cc482ae..15f5db4009 100644
--- a/src/gallium/drivers/rbug/rbug_objects.c
+++ b/src/gallium/drivers/rbug/rbug_objects.c
@@ -98,8 +98,9 @@ rbug_surface_create(struct rbug_context *rb_context,
pipe_reference_init(&rb_surface->base.reference, 1);
rb_surface->base.texture = NULL;
+ rb_surface->base.context = &rb_context->base;
+ rb_surface->surface = surface; /* we own the surface already */
pipe_resource_reference(&rb_surface->base.texture, &rb_resource->base);
- rb_surface->surface = surface;
return &rb_surface->base;
@@ -113,8 +114,7 @@ rbug_surface_destroy(struct rbug_context *rb_context,
struct rbug_surface *rb_surface)
{
pipe_resource_reference(&rb_surface->base.texture, NULL);
- rb_context->pipe->surface_destroy(rb_context->pipe,
- rb_surface->surface);
+ pipe_surface_reference(&rb_surface->surface, NULL);
FREE(rb_surface);
}
diff --git a/src/gallium/drivers/rbug/rbug_screen.c b/src/gallium/drivers/rbug/rbug_screen.c
index d635ce575c..7c8dfdcc12 100644
--- a/src/gallium/drivers/rbug/rbug_screen.c
+++ b/src/gallium/drivers/rbug/rbug_screen.c
@@ -106,8 +106,7 @@ rbug_screen_is_format_supported(struct pipe_screen *_screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage,
- unsigned geom_flags)
+ unsigned tex_usage)
{
struct rbug_screen *rb_screen = rbug_screen(_screen);
struct pipe_screen *screen = rb_screen->screen;
@@ -116,8 +115,7 @@ rbug_screen_is_format_supported(struct pipe_screen *_screen,
format,
target,
sample_count,
- tex_usage,
- geom_flags);
+ tex_usage);
}
static struct pipe_context *
@@ -240,30 +238,28 @@ rbug_screen_fence_reference(struct pipe_screen *_screen,
fence);
}
-static int
+static boolean
rbug_screen_fence_signalled(struct pipe_screen *_screen,
- struct pipe_fence_handle *fence,
- unsigned flags)
+ struct pipe_fence_handle *fence)
{
struct rbug_screen *rb_screen = rbug_screen(_screen);
struct pipe_screen *screen = rb_screen->screen;
return screen->fence_signalled(screen,
- fence,
- flags);
+ fence);
}
-static int
+static boolean
rbug_screen_fence_finish(struct pipe_screen *_screen,
struct pipe_fence_handle *fence,
- unsigned flags)
+ uint64_t timeout)
{
struct rbug_screen *rb_screen = rbug_screen(_screen);
struct pipe_screen *screen = rb_screen->screen;
return screen->fence_finish(screen,
fence,
- flags);
+ timeout);
}
boolean
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index e935ce6d21..ce22f64622 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -91,10 +91,17 @@ softpipe_destroy( struct pipe_context *pipe )
if (softpipe->draw)
draw_destroy( softpipe->draw );
- softpipe->quad.shade->destroy( softpipe->quad.shade );
- softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
- softpipe->quad.blend->destroy( softpipe->quad.blend );
- softpipe->quad.pstipple->destroy( softpipe->quad.pstipple );
+ if (softpipe->quad.shade)
+ softpipe->quad.shade->destroy( softpipe->quad.shade );
+
+ if (softpipe->quad.depth_test)
+ softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
+
+ if (softpipe->quad.blend)
+ softpipe->quad.blend->destroy( softpipe->quad.blend );
+
+ if (softpipe->quad.pstipple)
+ softpipe->quad.pstipple->destroy( softpipe->quad.pstipple );
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
sp_destroy_tile_cache(softpipe->cbuf_cache[i]);
@@ -105,8 +112,8 @@ softpipe_destroy( struct pipe_context *pipe )
pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL);
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- sp_destroy_tex_tile_cache(softpipe->tex_cache[i]);
- pipe_sampler_view_reference(&softpipe->sampler_views[i], NULL);
+ sp_destroy_tex_tile_cache(softpipe->fragment_tex_cache[i]);
+ pipe_sampler_view_reference(&softpipe->fragment_sampler_views[i], NULL);
}
for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
@@ -129,6 +136,10 @@ softpipe_destroy( struct pipe_context *pipe )
}
}
+ for (i = 0; i < softpipe->num_vertex_buffers; i++) {
+ pipe_resource_reference(&softpipe->vertex_buffer[i].buffer, NULL);
+ }
+
tgsi_exec_machine_destroy(softpipe->fs_machine);
FREE( softpipe );
@@ -137,13 +148,13 @@ softpipe_destroy( struct pipe_context *pipe )
/**
* if (the texture is being used as a framebuffer surface)
- * return PIPE_REFERENCED_FOR_WRITE
+ * return SP_REFERENCED_FOR_WRITE
* else if (the texture is a bound texture source)
- * return PIPE_REFERENCED_FOR_READ
+ * return SP_REFERENCED_FOR_READ
* else
- * return PIPE_UNREFERENCED
+ * return SP_UNREFERENCED
*/
-static unsigned int
+unsigned int
softpipe_is_resource_referenced( struct pipe_context *pipe,
struct pipe_resource *texture,
unsigned level, int layer)
@@ -152,40 +163,40 @@ softpipe_is_resource_referenced( struct pipe_context *pipe,
unsigned i;
if (texture->target == PIPE_BUFFER)
- return PIPE_UNREFERENCED;
+ return SP_UNREFERENCED;
/* check if any of the bound drawing surfaces are this texture */
if (softpipe->dirty_render_cache) {
for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
if (softpipe->framebuffer.cbufs[i] &&
softpipe->framebuffer.cbufs[i]->texture == texture) {
- return PIPE_REFERENCED_FOR_WRITE;
+ return SP_REFERENCED_FOR_WRITE;
}
}
if (softpipe->framebuffer.zsbuf &&
softpipe->framebuffer.zsbuf->texture == texture) {
- return PIPE_REFERENCED_FOR_WRITE;
+ return SP_REFERENCED_FOR_WRITE;
}
}
/* check if any of the tex_cache textures are this texture */
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- if (softpipe->tex_cache[i] &&
- softpipe->tex_cache[i]->texture == texture)
- return PIPE_REFERENCED_FOR_READ;
+ if (softpipe->fragment_tex_cache[i] &&
+ softpipe->fragment_tex_cache[i]->texture == texture)
+ return SP_REFERENCED_FOR_READ;
}
for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
if (softpipe->vertex_tex_cache[i] &&
softpipe->vertex_tex_cache[i]->texture == texture)
- return PIPE_REFERENCED_FOR_READ;
+ return SP_REFERENCED_FOR_READ;
}
for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) {
if (softpipe->geometry_tex_cache[i] &&
softpipe->geometry_tex_cache[i]->texture == texture)
- return PIPE_REFERENCED_FOR_READ;
+ return SP_REFERENCED_FOR_READ;
}
- return PIPE_UNREFERENCED;
+ return SP_UNREFERENCED;
}
@@ -219,7 +230,7 @@ softpipe_create_context( struct pipe_screen *screen,
softpipe->use_sse = FALSE;
#endif
- softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE );
+ softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE );
softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
softpipe->pipe.winsys = NULL;
@@ -244,9 +255,7 @@ softpipe_create_context( struct pipe_screen *screen,
softpipe->pipe.draw_stream_output = softpipe_draw_stream_output;
softpipe->pipe.clear = softpipe_clear;
- softpipe->pipe.flush = softpipe_flush;
-
- softpipe->pipe.is_resource_referenced = softpipe_is_resource_referenced;
+ softpipe->pipe.flush = softpipe_flush_wrapped;
softpipe->pipe.render_condition = softpipe_render_condition;
@@ -258,13 +267,22 @@ softpipe_create_context( struct pipe_screen *screen,
softpipe->cbuf_cache[i] = sp_create_tile_cache( &softpipe->pipe );
softpipe->zsbuf_cache = sp_create_tile_cache( &softpipe->pipe );
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
- softpipe->tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe );
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ softpipe->fragment_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe );
+ if (!softpipe->fragment_tex_cache[i])
+ goto fail;
+ }
+
for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe );
+ if (!softpipe->vertex_tex_cache[i])
+ goto fail;
}
+
for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) {
softpipe->geometry_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe );
+ if (!softpipe->geometry_tex_cache[i])
+ goto fail;
}
softpipe->fs_machine = tgsi_exec_machine_create();
@@ -295,7 +313,7 @@ softpipe_create_context( struct pipe_screen *screen,
(struct tgsi_sampler **)
softpipe->tgsi.geom_samplers_list);
- if (debug_get_bool_option( "SP_NO_RAST", FALSE ))
+ if (debug_get_bool_option( "SOFTPIPE_NO_RAST", FALSE ))
softpipe->no_rast = TRUE;
softpipe->vbuf_backend = sp_create_vbuf_backend(softpipe);
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 9361a3df09..a572ee8cf0 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -58,7 +58,7 @@ struct softpipe_context {
/** Constant state objects */
struct pipe_blend_state *blend;
- struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_state *fragment_samplers[PIPE_MAX_SAMPLERS];
struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_sampler_state *geometry_samplers[PIPE_MAX_GEOMETRY_SAMPLERS];
struct pipe_depth_stencil_alpha_state *depth_stencil;
@@ -77,7 +77,7 @@ struct softpipe_context {
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
- struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_sampler_view *geometry_sampler_views[PIPE_MAX_GEOMETRY_SAMPLERS];
struct pipe_viewport_state viewport;
@@ -91,8 +91,8 @@ struct softpipe_context {
} so_target;
struct pipe_query_data_so_statistics so_stats;
- unsigned num_samplers;
- unsigned num_sampler_views;
+ unsigned num_fragment_samplers;
+ unsigned num_fragment_sampler_views;
unsigned num_vertex_samplers;
unsigned num_vertex_sampler_views;
unsigned num_geometry_samplers;
@@ -154,9 +154,9 @@ struct softpipe_context {
/** TGSI exec things */
struct {
- struct sp_sampler_varient *geom_samplers_list[PIPE_MAX_GEOMETRY_SAMPLERS];
- struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS];
- struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS];
+ struct sp_sampler_variant *geom_samplers_list[PIPE_MAX_GEOMETRY_SAMPLERS];
+ struct sp_sampler_variant *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS];
+ struct sp_sampler_variant *frag_samplers_list[PIPE_MAX_SAMPLERS];
} tgsi;
struct tgsi_exec_machine *fs_machine;
@@ -174,7 +174,7 @@ struct softpipe_context {
struct softpipe_tile_cache *zsbuf_cache;
unsigned tex_timestamp;
- struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
+ struct softpipe_tex_tile_cache *fragment_tex_cache[PIPE_MAX_SAMPLERS];
struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS];
struct softpipe_tex_tile_cache *geometry_tex_cache[PIPE_MAX_GEOMETRY_SAMPLERS];
@@ -192,10 +192,19 @@ softpipe_context( struct pipe_context *pipe )
}
void
-softpipe_reset_sampler_varients(struct softpipe_context *softpipe);
+softpipe_reset_sampler_variants(struct softpipe_context *softpipe);
struct pipe_context *
softpipe_create_context( struct pipe_screen *, void *priv );
+#define SP_UNREFERENCED 0
+#define SP_REFERENCED_FOR_READ (1 << 0)
+#define SP_REFERENCED_FOR_WRITE (1 << 1)
+
+unsigned int
+softpipe_is_resource_referenced( struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ unsigned level, int layer);
+
#endif /* SP_CONTEXT_H */
diff --git a/src/gallium/drivers/softpipe/sp_fence.c b/src/gallium/drivers/softpipe/sp_fence.c
index 66c5214113..7b79a0df4e 100644
--- a/src/gallium/drivers/softpipe/sp_fence.c
+++ b/src/gallium/drivers/softpipe/sp_fence.c
@@ -41,23 +41,22 @@ softpipe_fence_reference(struct pipe_screen *screen,
}
-static int
+static boolean
softpipe_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flags)
+ struct pipe_fence_handle *fence)
{
assert(!fence);
- return 0;
+ return TRUE;
}
-static int
+static boolean
softpipe_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
- unsigned flags)
+ uint64_t timeout)
{
assert(!fence);
- return 0;
+ return TRUE;
}
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index 3a09ee8b4f..b7f283bf27 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -42,7 +42,7 @@
void
softpipe_flush( struct pipe_context *pipe,
- unsigned flags,
+ unsigned flags,
struct pipe_fence_handle **fence )
{
struct softpipe_context *softpipe = softpipe_context(pipe);
@@ -50,9 +50,9 @@ softpipe_flush( struct pipe_context *pipe,
draw_flush(softpipe->draw);
- if (1 || (flags & PIPE_FLUSH_TEXTURE_CACHE)) {
- for (i = 0; i < softpipe->num_sampler_views; i++) {
- sp_flush_tex_tile_cache(softpipe->tex_cache[i]);
+ if (1 || (flags & SP_FLUSH_TEXTURE_CACHE)) {
+ for (i = 0; i < softpipe->num_fragment_sampler_views; i++) {
+ sp_flush_tex_tile_cache(softpipe->fragment_tex_cache[i]);
}
for (i = 0; i < softpipe->num_vertex_sampler_views; i++) {
sp_flush_tex_tile_cache(softpipe->vertex_tex_cache[i]);
@@ -62,34 +62,27 @@ softpipe_flush( struct pipe_context *pipe,
}
}
- if (flags & PIPE_FLUSH_SWAPBUFFERS) {
- /* If this is a swapbuffers, just flush color buffers.
- *
- * The zbuffer changes are not discarded, but held in the cache
- * in the hope that a later clear will wipe them out.
- */
- for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++)
- if (softpipe->cbuf_cache[i])
- sp_flush_tile_cache(softpipe->cbuf_cache[i]);
-
- /* Need this call for hardware buffers before swapbuffers.
- *
- * there should probably be another/different flush-type function
- * that's called before swapbuffers because we don't always want
- * to unmap surfaces when flushing.
- */
- softpipe_unmap_transfers(softpipe);
- }
- else if (flags & PIPE_FLUSH_RENDER_CACHE) {
- for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++)
- if (softpipe->cbuf_cache[i])
- sp_flush_tile_cache(softpipe->cbuf_cache[i]);
-
- if (softpipe->zsbuf_cache)
- sp_flush_tile_cache(softpipe->zsbuf_cache);
-
- softpipe->dirty_render_cache = FALSE;
- }
+ /* If this is a swapbuffers, just flush color buffers.
+ *
+ * The zbuffer changes are not discarded, but held in the cache
+ * in the hope that a later clear will wipe them out.
+ */
+ for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++)
+ if (softpipe->cbuf_cache[i])
+ sp_flush_tile_cache(softpipe->cbuf_cache[i]);
+
+ if (softpipe->zsbuf_cache)
+ sp_flush_tile_cache(softpipe->zsbuf_cache);
+
+ softpipe->dirty_render_cache = FALSE;
+
+ /* Need this call for hardware buffers before swapbuffers.
+ *
+ * there should probably be another/different flush-type function
+ * that's called before swapbuffers because we don't always want
+ * to unmap surfaces when flushing.
+ */
+ softpipe_unmap_transfers(softpipe);
/* Enable to dump BMPs of the color/depth buffers each frame */
#if 0
@@ -108,6 +101,13 @@ softpipe_flush( struct pipe_context *pipe,
*fence = NULL;
}
+void
+softpipe_flush_wrapped( struct pipe_context *pipe,
+ struct pipe_fence_handle **fence )
+{
+ softpipe_flush(pipe, SP_FLUSH_TEXTURE_CACHE, fence);
+}
+
/**
* Flush context if necessary.
@@ -129,21 +129,18 @@ softpipe_flush_resource(struct pipe_context *pipe,
{
unsigned referenced;
- referenced = pipe->is_resource_referenced(pipe, texture, level, layer);
+ referenced = softpipe_is_resource_referenced(pipe, texture, level, layer);
- if ((referenced & PIPE_REFERENCED_FOR_WRITE) ||
- ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) {
+ if ((referenced & SP_REFERENCED_FOR_WRITE) ||
+ ((referenced & SP_REFERENCED_FOR_READ) && !read_only)) {
/*
* TODO: The semantics of these flush flags are too obtuse. They should
* disappear and the pipe driver should just ensure that all visible
* side-effects happen when they need to happen.
*/
- if (referenced & PIPE_REFERENCED_FOR_WRITE)
- flush_flags |= PIPE_FLUSH_RENDER_CACHE;
-
- if (referenced & PIPE_REFERENCED_FOR_READ)
- flush_flags |= PIPE_FLUSH_TEXTURE_CACHE;
+ if (referenced & SP_REFERENCED_FOR_READ)
+ flush_flags |= SP_FLUSH_TEXTURE_CACHE;
if (cpu_access) {
/*
@@ -155,14 +152,15 @@ softpipe_flush_resource(struct pipe_context *pipe,
if (do_not_block)
return FALSE;
- pipe->flush(pipe, flush_flags, &fence);
+ softpipe_flush(pipe, flush_flags, &fence);
if (fence) {
/*
* This is for illustrative purposes only, as softpipe does not
* have fences.
*/
- pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_finish(pipe->screen, fence,
+ PIPE_TIMEOUT_INFINITE);
pipe->screen->fence_reference(pipe->screen, &fence, NULL);
}
} else {
@@ -170,7 +168,7 @@ softpipe_flush_resource(struct pipe_context *pipe,
* Just flush.
*/
- pipe->flush(pipe, flush_flags, NULL);
+ softpipe_flush(pipe, flush_flags, NULL);
}
}
diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h
index 22a5ceeb9e..ab01c249ab 100644
--- a/src/gallium/drivers/softpipe/sp_flush.h
+++ b/src/gallium/drivers/softpipe/sp_flush.h
@@ -33,10 +33,17 @@
struct pipe_context;
struct pipe_fence_handle;
+#define SP_FLUSH_TEXTURE_CACHE 0x2
+
void
-softpipe_flush(struct pipe_context *pipe, unsigned flags,
+softpipe_flush(struct pipe_context *pipe,
+ unsigned flags,
struct pipe_fence_handle **fence);
+void
+softpipe_flush_wrapped( struct pipe_context *pipe,
+ struct pipe_fence_handle **fence );
+
boolean
softpipe_flush_resource(struct pipe_context *pipe,
struct pipe_resource *texture,
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 6af1b2d061..76cfc0bf51 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -35,6 +35,7 @@
#include "util/u_memory.h"
#include "util/u_format.h"
#include "sp_context.h"
+#include "sp_state.h"
#include "sp_quad.h"
#include "sp_tile_cache.h"
#include "sp_quad_pipe.h"
@@ -794,6 +795,9 @@ blend_fallback(struct quad_stage *qs,
struct softpipe_context *softpipe = qs->softpipe;
const struct pipe_blend_state *blend = softpipe->blend;
unsigned cbuf;
+ boolean write_all;
+
+ write_all = softpipe->fs->color0_writes_all_cbufs;
for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++)
{
@@ -806,15 +810,19 @@ blend_fallback(struct quad_stage *qs,
quads[0]->input.y0);
boolean has_dst_alpha
= util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format);
- uint q, i, j;
+ uint q, i, j, qbuf;
+
+ qbuf = write_all ? 0 : cbuf;
for (q = 0; q < nr; q++) {
struct quad_header *quad = quads[q];
- float (*quadColor)[4] = quad->output.color[cbuf];
+ float (*quadColor)[4];
const int itx = (quad->input.x0 & (TILE_SIZE-1));
const int ity = (quad->input.y0 & (TILE_SIZE-1));
- /* get/swizzle dest colors
+ quadColor = quad->output.color[qbuf];
+
+ /* get/swizzle dest colors
*/
for (j = 0; j < QUAD_SIZE; j++) {
int x = itx + (j & 1);
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index cbdea19af4..48aabc18da 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -123,6 +123,11 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 0;
case PIPE_CAP_SHADER_STENCIL_EXPORT:
return 1;
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ return 1;
+ case PIPE_CAP_ARRAY_TEXTURES:
+ return 1;
default:
return 0;
}
@@ -175,15 +180,16 @@ softpipe_is_format_supported( struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned bind,
- unsigned geom_flags )
+ unsigned bind)
{
struct sw_winsys *winsys = softpipe_screen(screen)->winsys;
const struct util_format_description *format_desc;
assert(target == PIPE_BUFFER ||
target == PIPE_TEXTURE_1D ||
+ target == PIPE_TEXTURE_1D_ARRAY ||
target == PIPE_TEXTURE_2D ||
+ target == PIPE_TEXTURE_2D_ARRAY ||
target == PIPE_TEXTURE_RECT ||
target == PIPE_TEXTURE_3D ||
target == PIPE_TEXTURE_CUBE);
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 5d727dc00d..0ce28f4c6e 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -575,7 +575,7 @@ setup_fragcoord_coeff(struct setup_context *setup, uint slot)
setup->coef[slot].dady[0] = 0.0;
/*Y*/
setup->coef[slot].a0[1] =
- (spfs->origin_lower_left ? setup->softpipe->framebuffer.height : 0)
+ (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
+ (spfs->pixel_center_integer ? 0.0 : 0.5);
setup->coef[slot].dadx[1] = 0.0;
setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0;
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 525bf23734..bb19f8cff2 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -74,7 +74,7 @@ struct sp_fragment_shader {
boolean origin_lower_left; /**< fragment shader uses lower left position origin? */
boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */
-
+ boolean color0_writes_all_cbufs; /**< fragment shader writes color0 to all bound cbufs */
void (*prepare)( const struct sp_fragment_shader *shader,
struct tgsi_exec_machine *machine,
struct tgsi_sampler **samplers);
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 3ba4d934fd..f9590eb0b2 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -197,11 +197,11 @@ update_tgsi_samplers( struct softpipe_context *softpipe )
{
unsigned i;
- softpipe_reset_sampler_varients( softpipe );
+ softpipe_reset_sampler_variants( softpipe );
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- struct softpipe_tex_tile_cache *tc = softpipe->tex_cache[i];
- if (tc->texture) {
+ struct softpipe_tex_tile_cache *tc = softpipe->fragment_tex_cache[i];
+ if (tc && tc->texture) {
struct softpipe_resource *spt = softpipe_resource(tc->texture);
if (spt->timestamp != tc->timestamp) {
sp_tex_tile_cache_validate_texture( tc );
@@ -216,7 +216,7 @@ update_tgsi_samplers( struct softpipe_context *softpipe )
for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
struct softpipe_tex_tile_cache *tc = softpipe->vertex_tex_cache[i];
- if (tc->texture) {
+ if (tc && tc->texture) {
struct softpipe_resource *spt = softpipe_resource(tc->texture);
if (spt->timestamp != tc->timestamp) {
@@ -229,7 +229,7 @@ update_tgsi_samplers( struct softpipe_context *softpipe )
for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) {
struct softpipe_tex_tile_cache *tc = softpipe->geometry_tex_cache[i];
- if (tc->texture) {
+ if (tc && tc->texture) {
struct softpipe_resource *spt = softpipe_resource(tc->texture);
if (spt->timestamp != tc->timestamp) {
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index b59fbc33ed..60331bc497 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -43,8 +43,8 @@
struct sp_sampler {
struct pipe_sampler_state base;
- struct sp_sampler_varient *varients;
- struct sp_sampler_varient *current;
+ struct sp_sampler_variant *variants;
+ struct sp_sampler_variant *current;
};
static struct sp_sampler *sp_sampler( struct pipe_sampler_state *sampler )
@@ -60,15 +60,15 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
struct sp_sampler *sp_sampler = CALLOC_STRUCT(sp_sampler);
sp_sampler->base = *sampler;
- sp_sampler->varients = NULL;
+ sp_sampler->variants = NULL;
return (void *)sp_sampler;
}
static void
-softpipe_bind_sampler_states(struct pipe_context *pipe,
- unsigned num, void **sampler)
+softpipe_bind_fragment_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
unsigned i;
@@ -76,18 +76,18 @@ softpipe_bind_sampler_states(struct pipe_context *pipe,
assert(num <= PIPE_MAX_SAMPLERS);
/* Check for no-op */
- if (num == softpipe->num_samplers &&
- !memcmp(softpipe->sampler, sampler, num * sizeof(void *)))
+ if (num == softpipe->num_fragment_samplers &&
+ !memcmp(softpipe->fragment_samplers, sampler, num * sizeof(void *)))
return;
draw_flush(softpipe->draw);
for (i = 0; i < num; ++i)
- softpipe->sampler[i] = sampler[i];
+ softpipe->fragment_samplers[i] = sampler[i];
for (i = num; i < PIPE_MAX_SAMPLERS; ++i)
- softpipe->sampler[i] = NULL;
+ softpipe->fragment_samplers[i] = NULL;
- softpipe->num_samplers = num;
+ softpipe->num_fragment_samplers = num;
softpipe->dirty |= SP_NEW_SAMPLER;
}
@@ -181,9 +181,9 @@ softpipe_sampler_view_destroy(struct pipe_context *pipe,
static void
-softpipe_set_sampler_views(struct pipe_context *pipe,
- unsigned num,
- struct pipe_sampler_view **views)
+softpipe_set_fragment_sampler_views(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_sampler_view **views)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
uint i;
@@ -191,8 +191,9 @@ softpipe_set_sampler_views(struct pipe_context *pipe,
assert(num <= PIPE_MAX_SAMPLERS);
/* Check for no-op */
- if (num == softpipe->num_sampler_views &&
- !memcmp(softpipe->sampler_views, views, num * sizeof(struct pipe_sampler_view *)))
+ if (num == softpipe->num_fragment_sampler_views &&
+ !memcmp(softpipe->fragment_sampler_views, views,
+ num * sizeof(struct pipe_sampler_view *)))
return;
draw_flush(softpipe->draw);
@@ -200,11 +201,11 @@ softpipe_set_sampler_views(struct pipe_context *pipe,
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
struct pipe_sampler_view *view = i < num ? views[i] : NULL;
- pipe_sampler_view_reference(&softpipe->sampler_views[i], view);
- sp_tex_tile_cache_set_sampler_view(softpipe->tex_cache[i], view);
+ pipe_sampler_view_reference(&softpipe->fragment_sampler_views[i], view);
+ sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[i], view);
}
- softpipe->num_sampler_views = num;
+ softpipe->num_fragment_sampler_views = num;
softpipe->dirty |= SP_NEW_TEXTURE;
}
@@ -277,23 +278,23 @@ softpipe_set_geometry_sampler_views(struct pipe_context *pipe,
/**
- * Find/create an sp_sampler_varient object for sampling the given texture,
+ * Find/create an sp_sampler_variant object for sampling the given texture,
* sampler and tex unit.
*
* Note that the tex unit is significant. We can't re-use a sampler
- * varient for multiple texture units because the sampler varient contains
+ * variant for multiple texture units because the sampler variant contains
* the texture object pointer. If the texture object pointer were stored
- * somewhere outside the sampler varient, we could re-use samplers for
+ * somewhere outside the sampler variant, we could re-use samplers for
* multiple texture units.
*/
-static struct sp_sampler_varient *
-get_sampler_varient( unsigned unit,
+static struct sp_sampler_variant *
+get_sampler_variant( unsigned unit,
struct sp_sampler *sampler,
- struct pipe_resource *resource,
+ struct pipe_sampler_view *view,
unsigned processor )
{
- struct softpipe_resource *sp_texture = softpipe_resource(resource);
- struct sp_sampler_varient *v = NULL;
+ struct softpipe_resource *sp_texture = softpipe_resource(view->texture);
+ struct sp_sampler_variant *v = NULL;
union sp_sampler_key key;
/* if this fails, widen the key.unit field and update this assertion */
@@ -303,6 +304,10 @@ get_sampler_varient( unsigned unit,
key.bits.is_pot = sp_texture->pot;
key.bits.processor = processor;
key.bits.unit = unit;
+ key.bits.swizzle_r = view->swizzle_r;
+ key.bits.swizzle_g = view->swizzle_g;
+ key.bits.swizzle_b = view->swizzle_b;
+ key.bits.swizzle_a = view->swizzle_a;
key.bits.pad = 0;
if (sampler->current &&
@@ -311,14 +316,14 @@ get_sampler_varient( unsigned unit,
}
if (v == NULL) {
- for (v = sampler->varients; v; v = v->next)
+ for (v = sampler->variants; v; v = v->next)
if (v->key.value == key.value)
break;
if (v == NULL) {
- v = sp_create_sampler_varient( &sampler->base, key );
- v->next = sampler->varients;
- sampler->varients = v;
+ v = sp_create_sampler_variant( &sampler->base, key );
+ v->next = sampler->variants;
+ sampler->variants = v;
}
}
@@ -328,7 +333,7 @@ get_sampler_varient( unsigned unit,
void
-softpipe_reset_sampler_varients(struct softpipe_context *softpipe)
+softpipe_reset_sampler_variants(struct softpipe_context *softpipe)
{
int i;
@@ -338,65 +343,47 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe)
*/
for (i = 0; i <= softpipe->vs->max_sampler; i++) {
if (softpipe->vertex_samplers[i]) {
- struct pipe_resource *texture = NULL;
-
- if (softpipe->vertex_sampler_views[i]) {
- texture = softpipe->vertex_sampler_views[i]->texture;
- }
-
softpipe->tgsi.vert_samplers_list[i] =
- get_sampler_varient( i,
+ get_sampler_variant( i,
sp_sampler(softpipe->vertex_samplers[i]),
- texture,
+ softpipe->vertex_sampler_views[i],
TGSI_PROCESSOR_VERTEX );
- sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i],
- softpipe->vertex_tex_cache[i],
- texture );
+ sp_sampler_variant_bind_view( softpipe->tgsi.vert_samplers_list[i],
+ softpipe->vertex_tex_cache[i],
+ softpipe->vertex_sampler_views[i] );
}
}
if (softpipe->gs) {
for (i = 0; i <= softpipe->gs->max_sampler; i++) {
if (softpipe->geometry_samplers[i]) {
- struct pipe_resource *texture = NULL;
-
- if (softpipe->geometry_sampler_views[i]) {
- texture = softpipe->geometry_sampler_views[i]->texture;
- }
-
softpipe->tgsi.geom_samplers_list[i] =
- get_sampler_varient(
+ get_sampler_variant(
i,
sp_sampler(softpipe->geometry_samplers[i]),
- texture,
+ softpipe->geometry_sampler_views[i],
TGSI_PROCESSOR_GEOMETRY );
- sp_sampler_varient_bind_texture(
+ sp_sampler_variant_bind_view(
softpipe->tgsi.geom_samplers_list[i],
softpipe->geometry_tex_cache[i],
- texture );
+ softpipe->geometry_sampler_views[i] );
}
}
}
for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) {
- if (softpipe->sampler[i]) {
- struct pipe_resource *texture = NULL;
-
- if (softpipe->sampler_views[i]) {
- texture = softpipe->sampler_views[i]->texture;
- }
-
+ if (softpipe->fragment_samplers[i]) {
softpipe->tgsi.frag_samplers_list[i] =
- get_sampler_varient( i,
- sp_sampler(softpipe->sampler[i]),
- texture,
+ get_sampler_variant( i,
+ sp_sampler(softpipe->fragment_samplers[i]),
+ softpipe->fragment_sampler_views[i],
TGSI_PROCESSOR_FRAGMENT );
- sp_sampler_varient_bind_texture( softpipe->tgsi.frag_samplers_list[i],
- softpipe->tex_cache[i],
- texture );
+ sp_sampler_variant_bind_view( softpipe->tgsi.frag_samplers_list[i],
+ softpipe->fragment_tex_cache[i],
+ softpipe->fragment_sampler_views[i] );
}
}
}
@@ -406,11 +393,11 @@ softpipe_delete_sampler_state(struct pipe_context *pipe,
void *sampler)
{
struct sp_sampler *sp_sampler = (struct sp_sampler *)sampler;
- struct sp_sampler_varient *v, *tmp;
+ struct sp_sampler_variant *v, *tmp;
- for (v = sp_sampler->varients; v; v = tmp) {
+ for (v = sp_sampler->variants; v; v = tmp) {
tmp = v->next;
- sp_sampler_varient_destroy(v);
+ sp_sampler_variant_destroy(v);
}
FREE( sampler );
@@ -421,12 +408,12 @@ void
softpipe_init_sampler_funcs(struct pipe_context *pipe)
{
pipe->create_sampler_state = softpipe_create_sampler_state;
- pipe->bind_fragment_sampler_states = softpipe_bind_sampler_states;
+ pipe->bind_fragment_sampler_states = softpipe_bind_fragment_sampler_states;
pipe->bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states;
pipe->bind_geometry_sampler_states = softpipe_bind_geometry_sampler_states;
pipe->delete_sampler_state = softpipe_delete_sampler_state;
- pipe->set_fragment_sampler_views = softpipe_set_sampler_views;
+ pipe->set_fragment_sampler_views = softpipe_set_fragment_sampler_views;
pipe->set_vertex_sampler_views = softpipe_set_vertex_sampler_views;
pipe->set_geometry_sampler_views = softpipe_set_geometry_sampler_views;
diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c
index 7fff338cce..3dec5de3cc 100644
--- a/src/gallium/drivers/softpipe/sp_state_shader.c
+++ b/src/gallium/drivers/softpipe/sp_state_shader.c
@@ -78,6 +78,8 @@ softpipe_create_fs_state(struct pipe_context *pipe,
state->origin_lower_left = state->info.properties[i].data[0];
else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER)
state->pixel_center_integer = state->info.properties[i].data[0];
+ else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS)
+ state->color0_writes_all_cbufs = state->info.properties[i].data[0];
}
return state;
@@ -89,8 +91,6 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
- draw_flush(softpipe->draw);
-
if (softpipe->fs == fs)
return;
diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c
index 7d8055f2ba..aa0b333c7a 100644
--- a/src/gallium/drivers/softpipe/sp_state_vertex.c
+++ b/src/gallium/drivers/softpipe/sp_state_vertex.c
@@ -33,6 +33,8 @@
#include "sp_state.h"
#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
#include "draw/draw_context.h"
@@ -84,8 +86,9 @@ softpipe_set_vertex_buffers(struct pipe_context *pipe,
assert(count <= PIPE_MAX_ATTRIBS);
- memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0]));
- softpipe->num_vertex_buffers = count;
+ util_copy_vertex_buffers(softpipe->vertex_buffer,
+ &softpipe->num_vertex_buffers,
+ buffers, count);
softpipe->dirty |= SP_NEW_VERTEX;
@@ -117,4 +120,5 @@ softpipe_init_vertex_funcs(struct pipe_context *pipe)
pipe->set_vertex_buffers = softpipe_set_vertex_buffers;
pipe->set_index_buffer = softpipe_set_index_buffer;
+ pipe->redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 2eac4c7a82..c09ce19559 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -539,18 +539,31 @@ wrap_linear_unorm_clamp_to_edge(const float s[4], unsigned size,
}
+/**
+ * Do coordinate to array index conversion. For array textures.
+ */
+static INLINE void
+wrap_array_layer(const float coord[4], unsigned size, int layer[4])
+{
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ int c = util_ifloor(coord[ch] + 0.5F);
+ layer[ch] = CLAMP(c, 0, size - 1);
+ }
+}
+
/**
* Examine the quad's texture coordinates to compute the partial
* derivatives w.r.t X and Y, then compute lambda (level of detail).
*/
static float
-compute_lambda_1d(const struct sp_sampler_varient *samp,
+compute_lambda_1d(const struct sp_sampler_variant *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE])
{
- const struct pipe_resource *texture = samp->texture;
+ const struct pipe_resource *texture = samp->view->texture;
float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
float rho = MAX2(dsdx, dsdy) * texture->width0;
@@ -560,12 +573,12 @@ compute_lambda_1d(const struct sp_sampler_varient *samp,
static float
-compute_lambda_2d(const struct sp_sampler_varient *samp,
+compute_lambda_2d(const struct sp_sampler_variant *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE])
{
- const struct pipe_resource *texture = samp->texture;
+ const struct pipe_resource *texture = samp->view->texture;
float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
@@ -579,12 +592,12 @@ compute_lambda_2d(const struct sp_sampler_varient *samp,
static float
-compute_lambda_3d(const struct sp_sampler_varient *samp,
+compute_lambda_3d(const struct sp_sampler_variant *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE])
{
- const struct pipe_resource *texture = samp->texture;
+ const struct pipe_resource *texture = samp->view->texture;
float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
@@ -608,7 +621,7 @@ compute_lambda_3d(const struct sp_sampler_varient *samp,
* Since there aren't derivatives to use, just return 0.
*/
static float
-compute_lambda_vert(const struct sp_sampler_varient *samp,
+compute_lambda_vert(const struct sp_sampler_variant *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE])
@@ -634,7 +647,7 @@ compute_lambda_vert(const struct sp_sampler_varient *samp,
static INLINE const float *
-get_texel_2d_no_border(const struct sp_sampler_varient *samp,
+get_texel_2d_no_border(const struct sp_sampler_variant *samp,
union tex_tile_address addr, int x, int y)
{
const struct softpipe_tex_cached_tile *tile;
@@ -651,16 +664,15 @@ get_texel_2d_no_border(const struct sp_sampler_varient *samp,
static INLINE const float *
-get_texel_2d(const struct sp_sampler_varient *samp,
+get_texel_2d(const struct sp_sampler_variant *samp,
union tex_tile_address addr, int x, int y)
{
- const struct pipe_resource *texture = samp->texture;
+ const struct pipe_resource *texture = samp->view->texture;
unsigned level = addr.bits.level;
if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
y < 0 || y >= (int) u_minify(texture->height0, level)) {
- return sp_tex_tile_cache_border_color(samp->cache,
- samp->sampler->border_color);
+ return samp->sampler->border_color;
}
else {
return get_texel_2d_no_border( samp, addr, x, y );
@@ -671,7 +683,7 @@ get_texel_2d(const struct sp_sampler_varient *samp,
/* Gather a quad of adjacent texels within a tile:
*/
static INLINE void
-get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp,
+get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant *samp,
union tex_tile_address addr,
unsigned x, unsigned y,
const float *out[4])
@@ -695,7 +707,7 @@ get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp,
/* Gather a quad of potentially non-adjacent texels:
*/
static INLINE void
-get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp,
+get_texel_quad_2d_no_border(const struct sp_sampler_variant *samp,
union tex_tile_address addr,
int x0, int y0,
int x1, int y1,
@@ -710,7 +722,7 @@ get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp,
/* Can involve a lot of unnecessary checks for border color:
*/
static INLINE void
-get_texel_quad_2d(const struct sp_sampler_varient *samp,
+get_texel_quad_2d(const struct sp_sampler_variant *samp,
union tex_tile_address addr,
int x0, int y0,
int x1, int y1,
@@ -724,10 +736,10 @@ get_texel_quad_2d(const struct sp_sampler_varient *samp,
-/* 3d varients:
+/* 3d variants:
*/
static INLINE const float *
-get_texel_3d_no_border(const struct sp_sampler_varient *samp,
+get_texel_3d_no_border(const struct sp_sampler_variant *samp,
union tex_tile_address addr, int x, int y, int z)
{
const struct softpipe_tex_cached_tile *tile;
@@ -745,17 +757,16 @@ get_texel_3d_no_border(const struct sp_sampler_varient *samp,
static INLINE const float *
-get_texel_3d(const struct sp_sampler_varient *samp,
+get_texel_3d(const struct sp_sampler_variant *samp,
union tex_tile_address addr, int x, int y, int z)
{
- const struct pipe_resource *texture = samp->texture;
+ const struct pipe_resource *texture = samp->view->texture;
unsigned level = addr.bits.level;
if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
y < 0 || y >= (int) u_minify(texture->height0, level) ||
z < 0 || z >= (int) u_minify(texture->depth0, level)) {
- return sp_tex_tile_cache_border_color(samp->cache,
- samp->sampler->border_color);
+ return samp->sampler->border_color;
}
else {
return get_texel_3d_no_border( samp, addr, x, y, z );
@@ -763,6 +774,43 @@ get_texel_3d(const struct sp_sampler_varient *samp,
}
+/* Get texel pointer for 1D array texture */
+static INLINE const float *
+get_texel_1d_array(const struct sp_sampler_variant *samp,
+ union tex_tile_address addr, int x, int y)
+{
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level = addr.bits.level;
+
+ if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
+ return samp->sampler->border_color;
+ }
+ else {
+ return get_texel_2d_no_border(samp, addr, x, y);
+ }
+}
+
+
+/* Get texel pointer for 2D array texture */
+static INLINE const float *
+get_texel_2d_array(const struct sp_sampler_variant *samp,
+ union tex_tile_address addr, int x, int y, int layer)
+{
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level = addr.bits.level;
+
+ assert(layer < texture->array_size);
+
+ if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+ y < 0 || y >= (int) u_minify(texture->height0, level)) {
+ return samp->sampler->border_color;
+ }
+ else {
+ return get_texel_3d_no_border(samp, addr, x, y, layer);
+ }
+}
+
+
/**
* Given the logbase2 of a mipmap's base level size and a mipmap level,
* return the size (in texels) of that mipmap level.
@@ -800,7 +848,7 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
unsigned j;
unsigned level = samp->level;
unsigned xpot = pot_level_size(samp->xpot, level);
@@ -863,7 +911,7 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
unsigned j;
unsigned level = samp->level;
unsigned xpot = pot_level_size(samp->xpot, level);
@@ -907,7 +955,7 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
unsigned j;
unsigned level = samp->level;
unsigned xpot = pot_level_size(samp->xpot, level);
@@ -960,8 +1008,8 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
unsigned level0, j;
int width;
int x[4];
@@ -992,6 +1040,47 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
static void
+img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level0, j;
+ int width;
+ int x[4], layer[4];
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = u_minify(texture->width0, level0);
+
+ assert(width > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->nearest_texcoord_s(s, width, x);
+ wrap_array_layer(t, texture->array_size, layer);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *out = get_texel_1d_array(samp, addr, x[j], layer[j]);
+ int c;
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = out[c];
+ }
+ }
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
+}
+
+
+static void
img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
@@ -1000,8 +1089,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
unsigned level0, j;
int width, height;
int x[4], y[4];
@@ -1035,6 +1124,50 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
}
+static void
+img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level0, j;
+ int width, height;
+ int x[4], y[4], layer[4];
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
+
+ assert(width > 0);
+ assert(height > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->nearest_texcoord_s(s, width, x);
+ samp->nearest_texcoord_t(t, height, y);
+ wrap_array_layer(p, texture->array_size, layer);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *out = get_texel_2d_array(samp, addr, x[j], y[j], layer[j]);
+ int c;
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = out[c];
+ }
+ }
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
+}
+
+
static INLINE union tex_tile_address
face(union tex_tile_address addr, unsigned face )
{
@@ -1052,8 +1185,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
const unsigned *faces = samp->faces; /* zero when not cube-mapping */
unsigned level0, j;
int width, height;
@@ -1096,8 +1229,8 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
unsigned level0, j;
int width, height, depth;
int x[4], y[4], z[4];
@@ -1138,8 +1271,8 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
unsigned level0, j;
int width;
int x0[4], x1[4];
@@ -1170,6 +1303,47 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
static void
+img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level0, j;
+ int width;
+ int x0[4], x1[4], layer[4];
+ float xw[4]; /* weights */
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = u_minify(texture->width0, level0);
+
+ assert(width > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->linear_texcoord_s(s, width, x0, x1, xw);
+ wrap_array_layer(t, texture->array_size, layer);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *tx0 = get_texel_1d_array(samp, addr, x0[j], layer[j]);
+ const float *tx1 = get_texel_1d_array(samp, addr, x1[j], layer[j]);
+ int c;
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
+ }
+ }
+}
+
+
+static void
img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
@@ -1178,8 +1352,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
unsigned level0, j;
int width, height;
int x0[4], y0[4], x1[4], y1[4];
@@ -1217,6 +1391,54 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
static void
+img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level0, j;
+ int width, height;
+ int x0[4], y0[4], x1[4], y1[4], layer[4];
+ float xw[4], yw[4]; /* weights */
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
+
+ assert(width > 0);
+ assert(height > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->linear_texcoord_s(s, width, x0, x1, xw);
+ samp->linear_texcoord_t(t, height, y0, y1, yw);
+ wrap_array_layer(p, texture->array_size, layer);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *tx0 = get_texel_2d_array(samp, addr, x0[j], y0[j], layer[j]);
+ const float *tx1 = get_texel_2d_array(samp, addr, x1[j], y0[j], layer[j]);
+ const float *tx2 = get_texel_2d_array(samp, addr, x0[j], y1[j], layer[j]);
+ const float *tx3 = get_texel_2d_array(samp, addr, x1[j], y1[j], layer[j]);
+ int c;
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_2d(xw[j], yw[j],
+ tx0[c], tx1[c],
+ tx2[c], tx3[c]);
+ }
+ }
+}
+
+
+static void
img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
@@ -1225,8 +1447,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
const unsigned *faces = samp->faces; /* zero when not cube-mapping */
unsigned level0, j;
int width, height;
@@ -1274,8 +1496,8 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
unsigned level0, j;
int width, height, depth;
int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
@@ -1350,8 +1572,8 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
int level0;
float lambda;
float lod[QUAD_SIZE];
@@ -1417,8 +1639,8 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
float lambda;
float lod[QUAD_SIZE];
@@ -1460,7 +1682,7 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
float lambda;
float lod[QUAD_SIZE];
@@ -1501,8 +1723,8 @@ mip_filter_linear_2d_linear_repeat_POT(
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- const struct pipe_resource *texture = samp->texture;
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
int level0;
float lambda;
float lod[QUAD_SIZE];
@@ -1569,10 +1791,11 @@ sample_compare(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_sampler_state *sampler = samp->sampler;
int j, k0, k1, k2, k3;
float val;
+ float pc0, pc1, pc2, pc3;
samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba);
@@ -1582,43 +1805,48 @@ sample_compare(struct tgsi_sampler *tgsi_sampler,
* RGBA channels. We look at the red channel here.
*/
+ pc0 = CLAMP(p[0], 0.0F, 1.0F);
+ pc1 = CLAMP(p[1], 0.0F, 1.0F);
+ pc2 = CLAMP(p[2], 0.0F, 1.0F);
+ pc3 = CLAMP(p[3], 0.0F, 1.0F);
+
/* compare four texcoords vs. four texture samples */
switch (sampler->compare_func) {
case PIPE_FUNC_LESS:
- k0 = p[0] < rgba[0][0];
- k1 = p[1] < rgba[0][1];
- k2 = p[2] < rgba[0][2];
- k3 = p[3] < rgba[0][3];
+ k0 = pc0 < rgba[0][0];
+ k1 = pc1 < rgba[0][1];
+ k2 = pc2 < rgba[0][2];
+ k3 = pc3 < rgba[0][3];
break;
case PIPE_FUNC_LEQUAL:
- k0 = p[0] <= rgba[0][0];
- k1 = p[1] <= rgba[0][1];
- k2 = p[2] <= rgba[0][2];
- k3 = p[3] <= rgba[0][3];
+ k0 = pc0 <= rgba[0][0];
+ k1 = pc1 <= rgba[0][1];
+ k2 = pc2 <= rgba[0][2];
+ k3 = pc3 <= rgba[0][3];
break;
case PIPE_FUNC_GREATER:
- k0 = p[0] > rgba[0][0];
- k1 = p[1] > rgba[0][1];
- k2 = p[2] > rgba[0][2];
- k3 = p[3] > rgba[0][3];
+ k0 = pc0 > rgba[0][0];
+ k1 = pc1 > rgba[0][1];
+ k2 = pc2 > rgba[0][2];
+ k3 = pc3 > rgba[0][3];
break;
case PIPE_FUNC_GEQUAL:
- k0 = p[0] >= rgba[0][0];
- k1 = p[1] >= rgba[0][1];
- k2 = p[2] >= rgba[0][2];
- k3 = p[3] >= rgba[0][3];
+ k0 = pc0 >= rgba[0][0];
+ k1 = pc1 >= rgba[0][1];
+ k2 = pc2 >= rgba[0][2];
+ k3 = pc3 >= rgba[0][3];
break;
case PIPE_FUNC_EQUAL:
- k0 = p[0] == rgba[0][0];
- k1 = p[1] == rgba[0][1];
- k2 = p[2] == rgba[0][2];
- k3 = p[3] == rgba[0][3];
+ k0 = pc0 == rgba[0][0];
+ k1 = pc1 == rgba[0][1];
+ k2 = pc2 == rgba[0][2];
+ k3 = pc3 == rgba[0][3];
break;
case PIPE_FUNC_NOTEQUAL:
- k0 = p[0] != rgba[0][0];
- k1 = p[1] != rgba[0][1];
- k2 = p[2] != rgba[0][2];
- k3 = p[3] != rgba[0][3];
+ k0 = pc0 != rgba[0][0];
+ k1 = pc1 != rgba[0][1];
+ k2 = pc2 != rgba[0][2];
+ k3 = pc3 != rgba[0][3];
break;
case PIPE_FUNC_ALWAYS:
k0 = k1 = k2 = k3 = 1;
@@ -1656,7 +1884,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
unsigned j;
float ssss[4], tttt[4];
@@ -1731,6 +1959,86 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
}
+static void
+sample_swizzle(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ float rgba_temp[NUM_CHANNELS][QUAD_SIZE];
+ const unsigned swizzle_r = samp->key.bits.swizzle_r;
+ const unsigned swizzle_g = samp->key.bits.swizzle_g;
+ const unsigned swizzle_b = samp->key.bits.swizzle_b;
+ const unsigned swizzle_a = samp->key.bits.swizzle_a;
+ unsigned j;
+
+ samp->sample_target(tgsi_sampler, s, t, p, c0, control, rgba_temp);
+
+ switch (swizzle_r) {
+ case PIPE_SWIZZLE_ZERO:
+ for (j = 0; j < 4; j++)
+ rgba[0][j] = 0.0f;
+ break;
+ case PIPE_SWIZZLE_ONE:
+ for (j = 0; j < 4; j++)
+ rgba[0][j] = 1.0f;
+ break;
+ default:
+ assert(swizzle_r < 4);
+ for (j = 0; j < 4; j++)
+ rgba[0][j] = rgba_temp[swizzle_r][j];
+ }
+
+ switch (swizzle_g) {
+ case PIPE_SWIZZLE_ZERO:
+ for (j = 0; j < 4; j++)
+ rgba[1][j] = 0.0f;
+ break;
+ case PIPE_SWIZZLE_ONE:
+ for (j = 0; j < 4; j++)
+ rgba[1][j] = 1.0f;
+ break;
+ default:
+ assert(swizzle_g < 4);
+ for (j = 0; j < 4; j++)
+ rgba[1][j] = rgba_temp[swizzle_g][j];
+ }
+
+ switch (swizzle_b) {
+ case PIPE_SWIZZLE_ZERO:
+ for (j = 0; j < 4; j++)
+ rgba[2][j] = 0.0f;
+ break;
+ case PIPE_SWIZZLE_ONE:
+ for (j = 0; j < 4; j++)
+ rgba[2][j] = 1.0f;
+ break;
+ default:
+ assert(swizzle_b < 4);
+ for (j = 0; j < 4; j++)
+ rgba[2][j] = rgba_temp[swizzle_b][j];
+ }
+
+ switch (swizzle_a) {
+ case PIPE_SWIZZLE_ZERO:
+ for (j = 0; j < 4; j++)
+ rgba[3][j] = 0.0f;
+ break;
+ case PIPE_SWIZZLE_ONE:
+ for (j = 0; j < 4; j++)
+ rgba[3][j] = 1.0f;
+ break;
+ default:
+ assert(swizzle_a < 4);
+ for (j = 0; j < 4; j++)
+ rgba[3][j] = rgba_temp[swizzle_a][j];
+ }
+}
+
static wrap_nearest_func
get_nearest_unorm_wrap(unsigned mode)
@@ -1828,8 +2136,10 @@ get_lambda_func(const union sp_sampler_key key)
switch (key.bits.target) {
case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
return compute_lambda_1d;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
return compute_lambda_2d;
@@ -1854,6 +2164,12 @@ get_img_filter(const union sp_sampler_key key,
else
return img_filter_1d_linear;
break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ if (filter == PIPE_TEX_FILTER_NEAREST)
+ return img_filter_1d_array_nearest;
+ else
+ return img_filter_1d_array_linear;
+ break;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
/* Try for fast path:
@@ -1889,6 +2205,12 @@ get_img_filter(const union sp_sampler_key key,
else
return img_filter_2d_linear;
break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ if (filter == PIPE_TEX_FILTER_NEAREST)
+ return img_filter_2d_array_nearest;
+ else
+ return img_filter_2d_array_linear;
+ break;
case PIPE_TEXTURE_CUBE:
if (filter == PIPE_TEX_FILTER_NEAREST)
return img_filter_cube_nearest;
@@ -1909,16 +2231,17 @@ get_img_filter(const union sp_sampler_key key,
/**
- * Bind the given texture object and texture cache to the sampler varient.
+ * Bind the given texture object and texture cache to the sampler variant.
*/
void
-sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
- struct softpipe_tex_tile_cache *tex_cache,
- const struct pipe_resource *texture )
+sp_sampler_variant_bind_view( struct sp_sampler_variant *samp,
+ struct softpipe_tex_tile_cache *tex_cache,
+ const struct pipe_sampler_view *view )
{
const struct pipe_sampler_state *sampler = samp->sampler;
+ const struct pipe_resource *texture = view->texture;
- samp->texture = texture;
+ samp->view = view;
samp->cache = tex_cache;
samp->xpot = util_unsigned_logbase2( texture->width0 );
samp->ypot = util_unsigned_logbase2( texture->height0 );
@@ -1927,20 +2250,20 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
void
-sp_sampler_varient_destroy( struct sp_sampler_varient *samp )
+sp_sampler_variant_destroy( struct sp_sampler_variant *samp )
{
FREE(samp);
}
/**
- * Create a sampler varient for a given set of non-orthogonal state.
+ * Create a sampler variant for a given set of non-orthogonal state.
*/
-struct sp_sampler_varient *
-sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
+struct sp_sampler_variant *
+sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
const union sp_sampler_key key )
{
- struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient);
+ struct sp_sampler_variant *samp = CALLOC_STRUCT(sp_sampler_variant);
if (!samp)
return NULL;
@@ -2015,7 +2338,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
}
if (key.bits.target == PIPE_TEXTURE_CUBE) {
- samp->base.get_samples = sample_cube;
+ samp->sample_target = sample_cube;
}
else {
samp->faces[0] = 0;
@@ -2026,7 +2349,17 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
/* Skip cube face determination by promoting the compare
* function pointer:
*/
- samp->base.get_samples = samp->compare;
+ samp->sample_target = samp->compare;
+ }
+
+ if (key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
+ key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
+ key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
+ key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA) {
+ samp->base.get_samples = sample_swizzle;
+ }
+ else {
+ samp->base.get_samples = samp->sample_target;
}
return samp;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 6114acf737..f0b867edc6 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -32,7 +32,7 @@
#include "tgsi/tgsi_exec.h"
-struct sp_sampler_varient;
+struct sp_sampler_variant;
typedef void (*wrap_nearest_func)(const float s[4],
unsigned size,
@@ -44,7 +44,7 @@ typedef void (*wrap_linear_func)(const float s[4],
int icoord1[4],
float w[4]);
-typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler,
+typedef float (*compute_lambda_func)(const struct sp_sampler_variant *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE]);
@@ -64,7 +64,11 @@ union sp_sampler_key {
unsigned is_pot:1;
unsigned processor:2;
unsigned unit:4;
- unsigned pad:22;
+ unsigned swizzle_r:3;
+ unsigned swizzle_g:3;
+ unsigned swizzle_b:3;
+ unsigned swizzle_a:3;
+ unsigned pad:10;
} bits;
unsigned value;
};
@@ -72,7 +76,7 @@ union sp_sampler_key {
/**
* Subclass of tgsi_sampler
*/
-struct sp_sampler_varient
+struct sp_sampler_variant
{
struct tgsi_sampler base; /**< base class */
@@ -85,7 +89,7 @@ struct sp_sampler_varient
/* Currently bound texture:
*/
- const struct pipe_resource *texture;
+ const struct pipe_sampler_view *view;
struct softpipe_tex_tile_cache *cache;
unsigned processor;
@@ -113,32 +117,33 @@ struct sp_sampler_varient
filter_func mip_filter;
filter_func compare;
+ filter_func sample_target;
/* Linked list:
*/
- struct sp_sampler_varient *next;
+ struct sp_sampler_variant *next;
};
struct sp_sampler;
-/* Create a sampler varient for a given set of non-orthogonal state. Currently the
+/* Create a sampler variant for a given set of non-orthogonal state. Currently the
*/
-struct sp_sampler_varient *
-sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
+struct sp_sampler_variant *
+sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
const union sp_sampler_key key );
-void sp_sampler_varient_bind_texture( struct sp_sampler_varient *varient,
- struct softpipe_tex_tile_cache *tex_cache,
- const struct pipe_resource *tex );
+void sp_sampler_variant_bind_view( struct sp_sampler_variant *variant,
+ struct softpipe_tex_tile_cache *tex_cache,
+ const struct pipe_sampler_view *view );
-void sp_sampler_varient_destroy( struct sp_sampler_varient * );
+void sp_sampler_variant_destroy( struct sp_sampler_variant * );
-static INLINE struct sp_sampler_varient *
-sp_sampler_varient(const struct tgsi_sampler *sampler)
+static INLINE struct sp_sampler_variant *
+sp_sampler_variant(const struct tgsi_sampler *sampler)
{
- return (struct sp_sampler_varient *) sampler;
+ return (struct sp_sampler_variant *) sampler;
}
extern void
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
index 1393164150..e589ee7c84 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -251,6 +251,7 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
tc->tex_level != addr.bits.level ||
tc->tex_z != addr.bits.z) {
/* get new transfer (view into texture) */
+ unsigned width, height, layer;
if (tc->tex_trans) {
if (tc->tex_trans_map) {
@@ -262,14 +263,22 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
tc->tex_trans = NULL;
}
+ width = u_minify(tc->texture->width0, addr.bits.level);
+ if (tc->texture->target == PIPE_TEXTURE_1D_ARRAY) {
+ height = tc->texture->array_size;
+ layer = 0;
+ }
+ else {
+ height = u_minify(tc->texture->height0, addr.bits.level);
+ layer = addr.bits.face + addr.bits.z;
+ }
+
tc->tex_trans =
pipe_get_transfer(tc->pipe, tc->texture,
addr.bits.level,
- addr.bits.face + addr.bits.z,
+ layer,
PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED,
- 0, 0,
- u_minify(tc->texture->width0, addr.bits.level),
- u_minify(tc->texture->height0, addr.bits.level));
+ 0, 0, width, height);
tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans);
@@ -278,45 +287,21 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
tc->tex_z = addr.bits.z;
}
- /* get tile from the transfer (view into texture) */
- pipe_get_tile_swizzle(tc->pipe,
- tc->tex_trans,
- addr.bits.x * TILE_SIZE,
- addr.bits.y * TILE_SIZE,
- TILE_SIZE,
- TILE_SIZE,
- tc->swizzle_r,
- tc->swizzle_g,
- tc->swizzle_b,
- tc->swizzle_a,
- tc->format,
- (float *) tile->data.color);
+ /* Get tile from the transfer (view into texture), explicitly passing
+ * the image format.
+ */
+ pipe_get_tile_rgba_format(tc->pipe,
+ tc->tex_trans,
+ addr.bits.x * TILE_SIZE,
+ addr.bits.y * TILE_SIZE,
+ TILE_SIZE,
+ TILE_SIZE,
+ tc->format,
+ (float *) tile->data.color);
+
tile->addr = addr;
}
tc->last_tile = tile;
return tile;
}
-
-
-
-/**
- * Return the swizzled border color.
- */
-const float *
-sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc,
- const float border_color[4])
-{
- float rgba01[6];
-
- COPY_4V(rgba01, border_color);
- rgba01[PIPE_SWIZZLE_ZERO] = 0.0f;
- rgba01[PIPE_SWIZZLE_ONE] = 1.0f;
-
- tc->swz_border_color[0] = rgba01[tc->swizzle_r];
- tc->swz_border_color[1] = rgba01[tc->swizzle_g];
- tc->swz_border_color[2] = rgba01[tc->swizzle_b];
- tc->swz_border_color[3] = rgba01[tc->swizzle_a];
-
- return tc->swz_border_color;
-}
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
index e0b66bf3f7..9bced37990 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
@@ -92,11 +92,9 @@ struct softpipe_tex_tile_cache
unsigned swizzle_g;
unsigned swizzle_b;
unsigned swizzle_a;
- unsigned format;
+ enum pipe_format format;
struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */
-
- float swz_border_color[4]; /**< swizzled border color */
};
@@ -161,10 +159,5 @@ sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
}
-const float *
-sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc,
- const float border_color[4]);
-
-
#endif /* SP_TEX_TILE_CACHE_H */
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 509d9982b1..95374c34ec 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -62,13 +62,21 @@ softpipe_resource_layout(struct pipe_screen *screen,
unsigned buffer_size = 0;
for (level = 0; level <= pt->last_level; level++) {
+ unsigned slices;
+
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ slices = 6;
+ else if (pt->target == PIPE_TEXTURE_3D)
+ slices = depth;
+ else
+ slices = pt->array_size;
+
spr->stride[level] = util_format_get_stride(pt->format, width);
spr->level_offset[level] = buffer_size;
buffer_size += (util_format_get_nblocksy(pt->format, height) *
- ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
- spr->stride[level]);
+ slices * spr->stride[level]);
width = u_minify(width, 1);
height = u_minify(height, 1);
@@ -227,9 +235,13 @@ sp_get_tex_image_offset(const struct softpipe_resource *spr,
unsigned offset = spr->level_offset[level];
if (spr->base.target == PIPE_TEXTURE_CUBE ||
- spr->base.target == PIPE_TEXTURE_3D) {
+ spr->base.target == PIPE_TEXTURE_3D ||
+ spr->base.target == PIPE_TEXTURE_2D_ARRAY) {
offset += layer * nblocksy * spr->stride[level];
}
+ else if (spr->base.target == PIPE_TEXTURE_1D_ARRAY) {
+ offset += layer * spr->stride[level];
+ }
else {
assert(layer == 0);
}
@@ -292,7 +304,7 @@ softpipe_surface_destroy(struct pipe_context *pipe,
* a resource object.
* \param pipe rendering context
* \param resource the resource to transfer in/out of
- * \param sr indicates cube face or 3D texture slice
+ * \param level which mipmap level
* \param usage bitmask of PIPE_TRANSFER_x flags
* \param box the 1D/2D/3D region of interest
*/
@@ -311,8 +323,21 @@ softpipe_get_transfer(struct pipe_context *pipe,
/* make sure the requested region is in the image bounds */
assert(box->x + box->width <= u_minify(resource->width0, level));
- assert(box->y + box->height <= u_minify(resource->height0, level));
- assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1));
+ if (resource->target == PIPE_TEXTURE_1D_ARRAY) {
+ assert(box->y + box->height <= resource->array_size);
+ }
+ else {
+ assert(box->y + box->height <= u_minify(resource->height0, level));
+ if (resource->target == PIPE_TEXTURE_2D_ARRAY) {
+ assert(box->z + box->depth <= resource->array_size);
+ }
+ else if (resource->target == PIPE_TEXTURE_CUBE) {
+ assert(box->z < 6);
+ }
+ else {
+ assert(box->z + box->depth <= (u_minify(resource->depth0, level)));
+ }
+ }
/*
* Transfers, like other pipe operations, must happen in order, so flush the
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 480860af63..60870b8bee 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -357,11 +357,12 @@ sp_flush_tile(struct softpipe_tile_cache* tc, unsigned pos)
tc->entries[pos]->data.depth32, 0/*STRIDE*/);
}
else {
- pipe_put_tile_rgba(tc->pipe, tc->transfer,
- tc->tile_addrs[pos].bits.x * TILE_SIZE,
- tc->tile_addrs[pos].bits.y * TILE_SIZE,
- TILE_SIZE, TILE_SIZE,
- (float *) tc->entries[pos]->data.color);
+ pipe_put_tile_rgba_format(tc->pipe, tc->transfer,
+ tc->tile_addrs[pos].bits.x * TILE_SIZE,
+ tc->tile_addrs[pos].bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
+ tc->surface->format,
+ (float *) tc->entries[pos]->data.color);
}
tc->tile_addrs[pos].bits.invalid = 1; /* mark as empty */
}
@@ -468,11 +469,12 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc,
tile->data.depth32, 0/*STRIDE*/);
}
else {
- pipe_put_tile_rgba(tc->pipe, pt,
- tc->tile_addrs[pos].bits.x * TILE_SIZE,
- tc->tile_addrs[pos].bits.y * TILE_SIZE,
- TILE_SIZE, TILE_SIZE,
- (float *) tile->data.color);
+ pipe_put_tile_rgba_format(tc->pipe, pt,
+ tc->tile_addrs[pos].bits.x * TILE_SIZE,
+ tc->tile_addrs[pos].bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
+ tc->surface->format,
+ (float *) tile->data.color);
}
}
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index 05eab8a517..1ed1d5d25b 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -422,7 +422,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
struct svga_transfer *st, // IN
SVGA3dTransferType transfer, // IN
const SVGA3dCopyBox *boxes, // IN
- uint32 numBoxes) // IN
+ uint32 numBoxes, // IN
+ SVGA3dSurfaceDMAFlags flags) // IN
{
struct svga_texture *texture = svga_texture(st->base.resource);
SVGA3dCmdSurfaceDMA *cmd;
@@ -465,7 +466,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + boxesSize);
pSuffix->suffixSize = sizeof *pSuffix;
pSuffix->maximumOffset = st->hw_nblocksy*st->base.stride;
- memset(&pSuffix->flags, 0, sizeof pSuffix->flags);
+ pSuffix->flags = flags;
swc->commit(swc);
diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h
index 0e568d78e6..223ab17df8 100644
--- a/src/gallium/drivers/svga/svga_cmd.h
+++ b/src/gallium/drivers/svga/svga_cmd.h
@@ -102,7 +102,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
struct svga_transfer *st,
SVGA3dTransferType transfer,
const SVGA3dCopyBox *boxes,
- uint32 numBoxes);
+ uint32 numBoxes,
+ SVGA3dSurfaceDMAFlags flags);
enum pipe_error
SVGA3D_BufferDMA(struct svga_winsys_context *swc,
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index 1e513f1039..4782b4bf70 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -34,6 +34,7 @@
#include "svga_context.h"
#include "svga_screen.h"
+#include "svga_surface.h"
#include "svga_resource_texture.h"
#include "svga_resource_buffer.h"
#include "svga_resource.h"
@@ -43,6 +44,12 @@
#include "svga_debug.h"
#include "svga_state.h"
+DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(use_min_mipmap, "SVGA_USE_MIN_MIPMAP", FALSE);
+DEBUG_GET_ONCE_NUM_OPTION(disable_shader, "SVGA_DISABLE_SHADER", ~0);
+DEBUG_GET_ONCE_BOOL_OPTION(no_line_width, "SVGA_NO_LINE_WIDTH", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE", FALSE);
static void svga_destroy( struct pipe_context *pipe )
{
@@ -113,13 +120,12 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
/* debug */
- svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE);
- svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE);
- svga->debug.use_min_mipmap = debug_get_bool_option("SVGA_USE_MIN_MIPMAP", FALSE);
- svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0);
-
- if (!svga_init_swtnl(svga))
- goto no_swtnl;
+ svga->debug.no_swtnl = debug_get_option_no_swtnl();
+ svga->debug.force_swtnl = debug_get_option_force_swtnl();
+ svga->debug.use_min_mipmap = debug_get_option_use_min_mipmap();
+ svga->debug.disable_shader = debug_get_option_disable_shader();
+ svga->debug.no_line_width = debug_get_option_no_line_width();
+ svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple();
svga->fs_bm = util_bitmask_create();
if (svga->fs_bm == NULL)
@@ -149,6 +155,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
if (svga->hwtnl == NULL)
goto no_hwtnl;
+ if (!svga_init_swtnl(svga))
+ goto no_swtnl;
ret = svga_emit_initial_state( svga );
if (ret)
@@ -171,6 +179,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
return &svga->pipe;
no_state:
+ svga_destroy_swtnl(svga);
+no_swtnl:
svga_hwtnl_destroy( svga->hwtnl );
no_hwtnl:
u_upload_destroy( svga->upload_vb );
@@ -181,8 +191,6 @@ no_upload_ib:
no_vs_bm:
util_bitmask_destroy( svga->fs_bm );
no_fs_bm:
- svga_destroy_swtnl(svga);
-no_swtnl:
svga->swc->destroy(svga->swc);
no_swc:
FREE(svga);
@@ -196,14 +204,10 @@ void svga_context_flush( struct svga_context *svga,
{
struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
struct pipe_fence_handle *fence = NULL;
+ enum pipe_error ret;
svga->curr.nr_fbs = 0;
- /* Unmap upload manager buffers:
- */
- u_upload_flush(svga->upload_vb);
- u_upload_flush(svga->upload_ib);
-
/* Ensure that texture dma uploads are processed
* before submitting commands.
*/
@@ -220,9 +224,25 @@ void svga_context_flush( struct svga_context *svga,
*/
svga->dirty |= SVGA_NEW_COMMAND_BUFFER;
+ /*
+ * We must reemit the surface bindings here, because svga_update_state
+ * will always flush the primitives before processing the
+ * SVGA_NEW_COMMAND_BUFFER state change.
+ *
+ * TODO: Refactor this.
+ */
+ ret = svga_reemit_framebuffer_bindings(svga);
+ assert(ret == PIPE_OK);
+
+ ret = svga_reemit_tss_bindings(svga);
+ assert(ret == PIPE_OK);
+
+ svga->dirty &= ~SVGA_NEW_COMMAND_BUFFER;
+
if (SVGA_DEBUG & DEBUG_SYNC) {
if (fence)
- svga->pipe.screen->fence_finish( svga->pipe.screen, fence, 0);
+ svga->pipe.screen->fence_finish( svga->pipe.screen, fence,
+ PIPE_TIMEOUT_INFINITE);
}
if(pfence)
@@ -245,6 +265,30 @@ void svga_hwtnl_flush_retry( struct svga_context *svga )
assert(ret == 0);
}
+
+/* Emit all operations pending on host surfaces.
+ */
+void svga_surfaces_flush(struct svga_context *svga)
+{
+ unsigned i;
+
+ /* Emit buffered drawing commands.
+ */
+ svga_hwtnl_flush_retry( svga );
+
+ /* Emit back-copy from render target view to texture.
+ */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (svga->curr.framebuffer.cbufs[i])
+ svga_propagate_surface(svga, svga->curr.framebuffer.cbufs[i]);
+ }
+
+ if (svga->curr.framebuffer.zsbuf)
+ svga_propagate_surface(svga, svga->curr.framebuffer.zsbuf);
+
+}
+
+
struct svga_winsys_context *
svga_winsys_context( struct pipe_context *pipe )
{
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 04e281a506..7b36a3606e 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -35,6 +35,8 @@
#include "tgsi/tgsi_scan.h"
+#include "svga_state.h"
+
#define SVGA_TEX_UNITS 8
#define SVGA_MAX_POINTSIZE 80.0
@@ -147,7 +149,14 @@ struct svga_rasterizer_state {
float pointsize;
unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */
- unsigned need_pipeline:16; /* which prims do we need help for? */
+
+ /** Which prims do we need help for? Bitmask of (1 << PIPE_PRIM_x) flags */
+ unsigned need_pipeline:16;
+
+ /** For debugging: */
+ const char* need_pipeline_tris_str;
+ const char* need_pipeline_lines_str;
+ const char* need_pipeline_points_str;
};
struct svga_sampler_state {
@@ -237,7 +246,7 @@ struct svga_prescale {
};
-/* Updated by calling svga_update_state( SVGA_STATE_HW_VIEWPORT )
+/* Updated by calling svga_update_state( SVGA_STATE_HW_CLEAR )
*/
struct svga_hw_clear_state
{
@@ -288,6 +297,11 @@ struct svga_sw_state
boolean need_swvfetch;
boolean need_pipeline;
boolean need_swtnl;
+
+ /* Flag to make sure that need sw is on while
+ * updating state within a swtnl call.
+ */
+ boolean in_swtnl_draw;
};
@@ -312,6 +326,9 @@ struct svga_context
unsigned shader_id;
unsigned disable_shader;
+
+ boolean no_line_width;
+ boolean force_hw_line_stipple;
} debug;
struct {
@@ -327,7 +344,7 @@ struct svga_context
struct util_bitmask *vs_bm;
struct {
- unsigned dirty[4];
+ unsigned dirty[SVGA_STATE_MAX];
unsigned texture_timestamp;
@@ -350,6 +367,9 @@ struct svga_context
/** List of buffers with queued transfers */
struct list_head dirty_buffers;
+
+ /** Was the previous draw done with the SW path? */
+ boolean prev_draw_swtnl;
};
/* A flag for each state_tracker state object:
@@ -433,6 +453,8 @@ void svga_context_flush( struct svga_context *svga,
void svga_hwtnl_flush_retry( struct svga_context *svga );
+void svga_surfaces_flush(struct svga_context *svga);
+
struct pipe_context *
svga_context_create(struct pipe_screen *screen,
void *priv);
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 81dd4778d0..2c873a0f7a 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -28,6 +28,7 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_upload_mgr.h"
#include "svga_context.h"
#include "svga_draw.h"
@@ -143,6 +144,9 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
SVGA3dPrimitiveRange *prim;
unsigned i;
+ /* Unmap upload manager vertex buffers */
+ u_upload_flush(svga->upload_vb);
+
for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
if (handle == NULL)
@@ -151,6 +155,9 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
vb_handle[i] = handle;
}
+ /* Unmap upload manager index buffers */
+ u_upload_flush(svga->upload_ib);
+
for (i = 0; i < hwtnl->cmd.prim_count; i++) {
if (hwtnl->cmd.prim_ib[i]) {
handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
@@ -315,7 +322,6 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
break;
}
- assert(!stride || width <= stride);
if (max_index != ~0) {
assert(offset + (index_bias + max_index) * stride + width <= size);
}
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
index da33fae62f..a6518042eb 100644
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -53,6 +53,7 @@ static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl,
dst = pipe_buffer_create( pipe->screen,
PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STATIC,
size );
if (dst == NULL)
goto fail;
@@ -65,14 +66,14 @@ static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl,
generate( nr,
dst_map );
- pipe_buffer_unmap( pipe, dst, transfer );
+ pipe_buffer_unmap( pipe, transfer );
*out_buf = dst;
return PIPE_OK;
fail:
if (dst_map)
- pipe_buffer_unmap( pipe, dst, transfer );
+ pipe_buffer_unmap( pipe, transfer );
if (dst)
pipe->screen->resource_destroy( pipe->screen, dst );
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c
index c4579177b7..7d420c6b29 100644
--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -56,6 +56,7 @@ translate_indices( struct svga_hwtnl *hwtnl,
dst = pipe_buffer_create( pipe->screen,
PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STATIC,
size );
if (dst == NULL)
goto fail;
@@ -72,18 +73,18 @@ translate_indices( struct svga_hwtnl *hwtnl,
nr,
dst_map );
- pipe_buffer_unmap( pipe, src, src_transfer );
- pipe_buffer_unmap( pipe, dst, dst_transfer );
+ pipe_buffer_unmap( pipe, src_transfer );
+ pipe_buffer_unmap( pipe, dst_transfer );
*out_buf = dst;
return PIPE_OK;
fail:
if (src_map)
- pipe_buffer_unmap( pipe, src, src_transfer );
+ pipe_buffer_unmap( pipe, src_transfer );
if (dst_map)
- pipe_buffer_unmap( pipe, dst, dst_transfer );
+ pipe_buffer_unmap( pipe, dst_transfer );
if (dst)
pipe->screen->resource_destroy( pipe->screen, dst );
@@ -120,14 +121,17 @@ svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
if (index_buffer &&
svga_buffer_is_user_buffer(index_buffer))
{
+ boolean flushed;
assert( index_buffer->width0 >= index_offset + count * index_size );
ret = u_upload_buffer( hwtnl->upload_ib,
+ 0,
index_offset,
count * index_size,
index_buffer,
&index_offset,
- &upload_buffer );
+ &upload_buffer,
+ &flushed );
if (ret)
goto done;
diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c
index 426698806c..c87afb6946 100644
--- a/src/gallium/drivers/svga/svga_pipe_blit.c
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -50,7 +50,9 @@ static void svga_surface_copy(struct pipe_context *pipe,
struct pipe_surface *srcsurf, *dstsurf;*/
unsigned dst_face, dst_z, src_face, src_z;
- svga_hwtnl_flush_retry( svga );
+ /* Emit buffered drawing commands, and any back copies.
+ */
+ svga_surfaces_flush( svga );
#if 0
srcsurf = screen->get_tex_surface(screen, src_tex,
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index 001ec3616c..fda5c28433 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -157,6 +157,14 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!u_trim_pipe_prim( info->mode, &count ))
return;
+ if (svga->state.sw.need_swtnl != svga->prev_draw_swtnl) {
+ /* We're switching between SW and HW drawing. Do a flush to avoid
+ * mixing HW and SW rendering with the same vertex buffer.
+ */
+ pipe->flush(pipe, NULL);
+ svga->prev_draw_swtnl = svga->state.sw.need_swtnl;
+ }
+
/*
* Mark currently bound target surfaces as dirty
* doesn't really matter if it is done before drawing.
diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c
index ab243aa6ec..4578c136cb 100644
--- a/src/gallium/drivers/svga/svga_pipe_flush.c
+++ b/src/gallium/drivers/svga/svga_pipe_flush.c
@@ -24,6 +24,7 @@
**********************************************************/
#include "pipe/p_defines.h"
+#include "util/u_string.h"
#include "svga_screen.h"
#include "svga_surface.h"
#include "svga_context.h"
@@ -31,31 +32,40 @@
static void svga_flush( struct pipe_context *pipe,
- unsigned flags,
struct pipe_fence_handle **fence )
{
struct svga_context *svga = svga_context(pipe);
- int i;
- /* Emit buffered drawing commands.
+ /* Emit buffered drawing commands, and any back copies.
*/
- svga_hwtnl_flush_retry( svga );
-
- /* Emit back-copy from render target view to texture.
- */
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
- if (svga->curr.framebuffer.cbufs[i])
- svga_propagate_surface(pipe, svga->curr.framebuffer.cbufs[i]);
- }
- if (svga->curr.framebuffer.zsbuf)
- svga_propagate_surface(pipe, svga->curr.framebuffer.zsbuf);
+ svga_surfaces_flush( svga );
/* Flush command queue.
*/
svga_context_flush(svga, fence);
- SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s flags %x fence_ptr %p\n",
- __FUNCTION__, flags, fence ? *fence : 0x0);
+ SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n",
+ __FUNCTION__, fence ? *fence : 0x0);
+
+ /* Enable to dump BMPs of the color/depth buffers each frame */
+ if (0) {
+ struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
+ static unsigned frame_no = 1;
+ char filename[256];
+ unsigned i;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ util_snprintf(filename, sizeof(filename), "cbuf%u_%04u", i, frame_no);
+ debug_dump_surface_bmp(&svga->pipe, filename, fb->cbufs[i]);
+ }
+
+ if (0 && fb->zsbuf) {
+ util_snprintf(filename, sizeof(filename), "zsbuf_%04u", frame_no);
+ debug_dump_surface_bmp(&svga->pipe, filename, fb->zsbuf);
+ }
+
+ ++frame_no;
+ }
}
diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c
index 8c24fb302f..440919c626 100644
--- a/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -94,7 +94,7 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i])
- svga_propagate_surface(pipe, dst->cbufs[i]);
+ svga_propagate_surface(svga, dst->cbufs[i]);
}
/* XXX: Actually the virtual hardware may support rendertargets with
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
index 660eb0757a..4a1a37f176 100644
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -64,18 +64,19 @@ static void *
svga_create_rasterizer_state(struct pipe_context *pipe,
const struct pipe_rasterizer_state *templ)
{
+ struct svga_context *svga = svga_context(pipe);
struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state );
+
/* need this for draw module. */
rast->templ = *templ;
- /* light_twoside - XXX: need fragment shader varient */
+ /* light_twoside - XXX: need fragment shader variant */
/* poly_smooth - XXX: no fallback available */
/* poly_stipple_enable - draw module */
/* sprite_coord_enable - ? */
/* point_quad_rasterization - ? */
/* point_size_per_vertex - ? */
/* sprite_coord_mode - ??? */
- /* bypass_vs_viewport_and_clip - handled by viewport setup */
/* flatshade_first - handled by index translation */
/* gl_rasterization_rules - XXX - viewport code */
/* line_width - draw module */
@@ -93,17 +94,22 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
/* Use swtnl + decomposition implement these:
*/
- if (templ->poly_stipple_enable)
+ if (templ->poly_stipple_enable) {
rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ rast->need_pipeline_tris_str = "poly stipple";
+ }
- if (templ->line_width != 1.0 &&
- templ->line_width != 0.0)
+ if (templ->line_width >= 1.5f &&
+ !svga->debug.no_line_width) {
rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+ rast->need_pipeline_lines_str = "line width";
+ }
if (templ->line_stipple_enable) {
- /* LinePattern not implemented on all backends.
+ /* XXX: LinePattern not implemented on all backends, and there is no
+ * mechanism to query it.
*/
- if (0) {
+ if (!svga->debug.force_hw_line_stipple) {
SVGA3dLinePattern lp;
lp.repeat = templ->line_stipple_factor + 1;
lp.pattern = templ->line_stipple_pattern;
@@ -111,11 +117,19 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
}
else {
rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+ rast->need_pipeline_lines_str = "line stipple";
}
}
- if (templ->point_smooth)
+ if (templ->point_smooth) {
rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
+ rast->need_pipeline_points_str = "smooth points";
+ }
+
+ if (templ->line_smooth) {
+ rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+ rast->need_pipeline_lines_str = "smooth lines";
+ }
{
int fill_front = templ->fill_front;
@@ -148,6 +162,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
* front/back fill modes:
*/
rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ rast->need_pipeline_tris_str = "different front/back fillmodes";
}
else {
offset = offset_front;
@@ -172,6 +187,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
{
fill = PIPE_POLYGON_MODE_FILL;
rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ rast->need_pipeline_tris_str = "unfilled primitives with no index manipulation";
}
/* If we are decomposing to lines, and lines need the pipeline,
@@ -182,6 +198,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
{
fill = PIPE_POLYGON_MODE_FILL;
rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ rast->need_pipeline_tris_str = "decomposing lines";
}
/* Similarly for points:
@@ -191,6 +208,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
{
fill = PIPE_POLYGON_MODE_FILL;
rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ rast->need_pipeline_tris_str = "decomposing points";
}
if (offset) {
@@ -201,9 +219,6 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
rast->hw_unfilled = fill;
}
-
-
-
if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) {
/* Turn off stuff which will get done in the draw module:
*/
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index f44a0e1325..446fcc4407 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -144,8 +144,9 @@ svga_create_sampler_state(struct pipe_context *pipe,
return cso;
}
-static void svga_bind_sampler_states(struct pipe_context *pipe,
- unsigned num, void **sampler)
+static void
+svga_bind_fragment_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
{
struct svga_context *svga = svga_context(pipe);
unsigned i;
@@ -203,9 +204,10 @@ svga_sampler_view_destroy(struct pipe_context *pipe,
FREE(view);
}
-static void svga_set_sampler_views(struct pipe_context *pipe,
- unsigned num,
- struct pipe_sampler_view **views)
+static void
+svga_set_fragment_sampler_views(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_sampler_view **views)
{
struct svga_context *svga = svga_context(pipe);
unsigned flag_1d = 0;
@@ -256,9 +258,9 @@ static void svga_set_sampler_views(struct pipe_context *pipe,
void svga_init_sampler_functions( struct svga_context *svga )
{
svga->pipe.create_sampler_state = svga_create_sampler_state;
- svga->pipe.bind_fragment_sampler_states = svga_bind_sampler_states;
+ svga->pipe.bind_fragment_sampler_states = svga_bind_fragment_sampler_states;
svga->pipe.delete_sampler_state = svga_delete_sampler_state;
- svga->pipe.set_fragment_sampler_views = svga_set_sampler_views;
+ svga->pipe.set_fragment_sampler_views = svga_set_fragment_sampler_views;
svga->pipe.create_sampler_view = svga_create_sampler_view;
svga->pipe.sampler_view_destroy = svga_sampler_view_destroy;
}
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
index 86c79459f3..5846991073 100644
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -27,6 +27,7 @@
#include "pipe/p_defines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
#include "svga_screen.h"
diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c
index ef2a0c40f0..6e0622a312 100644
--- a/src/gallium/drivers/svga/svga_resource.c
+++ b/src/gallium/drivers/svga/svga_resource.c
@@ -33,13 +33,13 @@ svga_resource_from_handle(struct pipe_screen * screen,
void
svga_init_resource_functions(struct svga_context *svga)
{
- svga->pipe.is_resource_referenced = u_is_resource_referenced_vtbl;
svga->pipe.get_transfer = u_get_transfer_vtbl;
svga->pipe.transfer_map = u_transfer_map_vtbl;
svga->pipe.transfer_flush_region = u_transfer_flush_region_vtbl;
svga->pipe.transfer_unmap = u_transfer_unmap_vtbl;
svga->pipe.transfer_destroy = u_transfer_destroy_vtbl;
svga->pipe.transfer_inline_write = u_transfer_inline_write_vtbl;
+ svga->pipe.redefine_user_buffer = svga_redefine_user_buffer;
}
void
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index f12e2b6862..2d7c524d86 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -51,53 +51,104 @@ svga_buffer_needs_hw_storage(unsigned usage)
}
-static unsigned int
-svga_buffer_is_referenced( struct pipe_context *pipe,
- struct pipe_resource *buf,
- unsigned level, int layer)
+/**
+ * Map a range of a buffer.
+ *
+ * Unlike texture DMAs (which are written immediately to the command buffer and
+ * therefore inherently serialized with other context operations), for buffers
+ * we try to coalesce multiple range mappings (i.e, multiple calls to this
+ * function) into a single DMA command, for better efficiency in command
+ * processing. This means we need to exercise extra care here to ensure that
+ * the end result is exactly the same as if one DMA was used for every mapped
+ * range.
+ */
+static void *
+svga_buffer_map_range( struct pipe_context *pipe,
+ struct pipe_resource *buf,
+ unsigned offset,
+ unsigned length,
+ unsigned usage )
{
+ struct svga_context *svga = svga_context(pipe);
struct svga_screen *ss = svga_screen(pipe->screen);
- struct svga_buffer *sbuf = svga_buffer(buf);
-
- /**
- * XXX: Check this.
- * The screen may cache buffer writes, but when we map, we map out
- * of those cached writes, so we don't need to set a
- * PIPE_REFERENCED_FOR_WRITE flag for cached buffers.
- */
-
- if (!sbuf->handle || ss->sws->surface_is_flushed(ss->sws, sbuf->handle))
- return PIPE_UNREFERENCED;
-
- /**
- * sws->surface_is_flushed() does not distinguish between read references
- * and write references. So assume a reference is both,
- * however, we make an exception for index- and vertex buffers, to avoid
- * a flush in st_bufferobj_get_subdata, during display list replay.
- */
+ struct svga_winsys_screen *sws = ss->sws;
+ struct svga_buffer *sbuf = svga_buffer( buf );
+ void *map;
- if (sbuf->b.b.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
- return PIPE_REFERENCED_FOR_READ;
+ if (usage & PIPE_TRANSFER_WRITE) {
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+ /*
+ * Finish writing any pending DMA commands, and tell the host to discard
+ * the buffer contents on the next DMA operation.
+ */
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
+ if (sbuf->dma.pending) {
+ svga_buffer_upload_flush(svga, sbuf);
+ /*
+ * Instead of flushing the context command buffer, simply discard
+ * the current hwbuf, and start a new one.
+ */
+ svga_buffer_destroy_hw_storage(ss, sbuf);
+ }
+ sbuf->map.num_ranges = 0;
+ sbuf->dma.flags.discard = TRUE;
+ }
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
+ if (!sbuf->map.num_ranges) {
+ /*
+ * No pending ranges to upload so far, so we can tell the host to
+ * not synchronize on the next DMA command.
+ */
+ sbuf->dma.flags.unsynchronized = TRUE;
+ }
+ } else {
+ /*
+ * Synchronizing, so finish writing any pending DMA command, and
+ * ensure the next DMA will be done in order.
+ */
-static void *
-svga_buffer_map_range( struct pipe_screen *screen,
- struct pipe_resource *buf,
- unsigned offset,
- unsigned length,
- unsigned usage )
-{
- struct svga_screen *ss = svga_screen(screen);
- struct svga_winsys_screen *sws = ss->sws;
- struct svga_buffer *sbuf = svga_buffer( buf );
- void *map;
+ if (sbuf->dma.pending) {
+ svga_buffer_upload_flush(svga, sbuf);
+
+ if (sbuf->hwbuf) {
+ /*
+ * We have a pending DMA upload from a hardware buffer, therefore
+ * we need to ensure that the host finishes processing that DMA
+ * command before the state tracker can start overwriting the
+ * hardware buffer.
+ *
+ * XXX: This could be avoided by tying the hardware buffer to
+ * the transfer (just as done with textures), which would allow
+ * overlapping DMAs commands to be queued on the same context
+ * buffer. However, due to the likelihood of software vertex
+ * processing, it is more convenient to hold on to the hardware
+ * buffer, allowing to quickly access the contents from the CPU
+ * without having to do a DMA download from the host.
+ */
+
+ if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ /*
+ * Flushing the command buffer here will most likely cause
+ * the map of the hwbuf below to block, so preemptively
+ * return NULL here if DONTBLOCK is set to prevent unnecessary
+ * command buffer flushes.
+ */
+
+ return NULL;
+ }
+
+ svga_context_flush(svga, NULL);
+ }
+ }
+
+ sbuf->dma.flags.unsynchronized = FALSE;
+ }
+ }
if (!sbuf->swbuf && !sbuf->hwbuf) {
if (svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) {
@@ -105,9 +156,12 @@ svga_buffer_map_range( struct pipe_screen *screen,
* We can't create a hardware buffer big enough, so create a malloc
* buffer instead.
*/
- debug_printf("%s: failed to allocate %u KB of DMA, splitting DMA transfers\n",
- __FUNCTION__,
- (sbuf->b.b.width0 + 1023)/1024);
+ if (0) {
+ debug_printf("%s: failed to allocate %u KB of DMA, "
+ "splitting DMA transfers\n",
+ __FUNCTION__,
+ (sbuf->b.b.width0 + 1023)/1024);
+ }
sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16);
}
@@ -141,12 +195,12 @@ svga_buffer_map_range( struct pipe_screen *screen,
static void
-svga_buffer_flush_mapped_range( struct pipe_screen *screen,
+svga_buffer_flush_mapped_range( struct pipe_context *pipe,
struct pipe_resource *buf,
unsigned offset, unsigned length)
{
struct svga_buffer *sbuf = svga_buffer( buf );
- struct svga_screen *ss = svga_screen(screen);
+ struct svga_screen *ss = svga_screen(pipe->screen);
pipe_mutex_lock(ss->swc_mutex);
assert(sbuf->map.writing);
@@ -158,10 +212,10 @@ svga_buffer_flush_mapped_range( struct pipe_screen *screen,
}
static void
-svga_buffer_unmap( struct pipe_screen *screen,
+svga_buffer_unmap( struct pipe_context *pipe,
struct pipe_resource *buf)
{
- struct svga_screen *ss = svga_screen(screen);
+ struct svga_screen *ss = svga_screen(pipe->screen);
struct svga_winsys_screen *sws = ss->sws;
struct svga_buffer *sbuf = svga_buffer( buf );
@@ -174,11 +228,18 @@ svga_buffer_unmap( struct pipe_screen *screen,
if(sbuf->hwbuf)
sws->buffer_unmap(sws, sbuf->hwbuf);
- if(sbuf->map.writing) {
- if(!sbuf->map.flush_explicit) {
- /* No mapped range was flushed -- flush the whole buffer */
+ if (sbuf->map.writing) {
+ if (!sbuf->map.flush_explicit) {
+ /*
+ * Mapped range not flushed explicitly, so flush the whole buffer,
+ * and tell the host to discard the contents when processing the DMA
+ * command.
+ */
+
SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n");
+ sbuf->dma.flags.discard = TRUE;
+
svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0);
}
@@ -225,7 +286,7 @@ static void *
svga_buffer_transfer_map( struct pipe_context *pipe,
struct pipe_transfer *transfer )
{
- uint8_t *map = svga_buffer_map_range( pipe->screen,
+ uint8_t *map = svga_buffer_map_range( pipe,
transfer->resource,
transfer->box.x,
transfer->box.width,
@@ -248,7 +309,7 @@ static void svga_buffer_transfer_flush_region( struct pipe_context *pipe,
{
assert(box->x + box->width <= transfer->box.width);
- svga_buffer_flush_mapped_range(pipe->screen,
+ svga_buffer_flush_mapped_range(pipe,
transfer->resource,
transfer->box.x + box->x,
box->width);
@@ -257,7 +318,7 @@ static void svga_buffer_transfer_flush_region( struct pipe_context *pipe,
static void svga_buffer_transfer_unmap( struct pipe_context *pipe,
struct pipe_transfer *transfer )
{
- svga_buffer_unmap(pipe->screen,
+ svga_buffer_unmap(pipe,
transfer->resource);
}
@@ -271,7 +332,6 @@ struct u_resource_vtbl svga_buffer_vtbl =
{
u_default_resource_get_handle, /* get_handle */
svga_buffer_destroy, /* resource_destroy */
- svga_buffer_is_referenced, /* is_resource_referenced */
u_default_get_transfer, /* get_transfer */
u_default_transfer_destroy, /* transfer_destroy */
svga_buffer_transfer_map, /* transfer_map */
@@ -308,6 +368,9 @@ svga_buffer_create(struct pipe_screen *screen,
goto error2;
}
+ debug_reference(&sbuf->b.b.reference,
+ (debug_reference_descriptor)debug_describe_resource, 0);
+
return &sbuf->b.b;
error2:
@@ -341,6 +404,9 @@ svga_user_buffer_create(struct pipe_screen *screen,
sbuf->swbuf = ptr;
sbuf->user = TRUE;
+
+ debug_reference(&sbuf->b.b.reference,
+ (debug_reference_descriptor)debug_describe_resource, 0);
return &sbuf->b.b;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h
index d3ec11bfd5..c559f70ec1 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -243,4 +243,10 @@ svga_winsys_buffer_create(struct svga_context *svga,
unsigned usage,
unsigned size);
+void
+svga_redefine_user_buffer(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned offset,
+ unsigned size);
+
#endif /* SVGA_BUFFER_H */
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 3de5216a94..0bfa8a14a6 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -40,6 +40,9 @@
#include "svga_debug.h"
+#define MAX_DMA_SIZE (4 * 1024 * 1024)
+
+
/**
* Allocate a winsys_buffer (ie. DMA, aka GMR memory).
*
@@ -57,6 +60,13 @@ svga_winsys_buffer_create( struct svga_context *svga,
struct svga_winsys_screen *sws = svgascreen->sws;
struct svga_winsys_buffer *buf;
+ /* XXX this shouldn't be a hard-coded number; it should be queried
+ * somehow.
+ */
+ if (size > MAX_DMA_SIZE) {
+ return NULL;
+ }
+
/* Just try */
buf = sws->buffer_create(sws, alignment, usage, size);
if(!buf) {
@@ -242,12 +252,17 @@ svga_buffer_upload_command(struct svga_context *svga,
* Patch up the upload DMA command reserved by svga_buffer_upload_command
* with the final ranges.
*/
-static void
+void
svga_buffer_upload_flush(struct svga_context *svga,
struct svga_buffer *sbuf)
{
SVGA3dCopyBox *boxes;
unsigned i;
+ struct pipe_resource *dummy;
+
+ if (!sbuf->dma.pending) {
+ return;
+ }
assert(sbuf->handle);
assert(sbuf->hwbuf);
@@ -285,17 +300,18 @@ svga_buffer_upload_flush(struct svga_context *svga,
sbuf->head.next = sbuf->head.prev = NULL;
#endif
sbuf->dma.pending = FALSE;
+ sbuf->dma.flags.discard = FALSE;
+ sbuf->dma.flags.unsynchronized = FALSE;
sbuf->dma.svga = NULL;
sbuf->dma.boxes = NULL;
- /* Decrement reference count */
- pipe_reference(&(sbuf->b.b.reference), NULL);
- sbuf = NULL;
+ /* Decrement reference count (and potentially destroy) */
+ dummy = &sbuf->b.b;
+ pipe_resource_reference(&dummy, NULL);
}
-
/**
* Note a dirty range.
*
@@ -326,12 +342,6 @@ svga_buffer_add_range(struct svga_buffer *sbuf,
/*
* Try to grow one of the ranges.
- *
- * Note that it is not this function task to care about overlapping ranges,
- * as the GMR was already given so it is too late to do anything. Situations
- * where overlapping ranges may pose a problem should be detected via
- * pipe_context::is_resource_referenced and the context that refers to the
- * buffer should be flushed.
*/
for(i = 0; i < sbuf->map.num_ranges; ++i) {
@@ -346,6 +356,11 @@ svga_buffer_add_range(struct svga_buffer *sbuf,
if (dist <= 0) {
/*
* Ranges are contiguous or overlapping -- extend this one and return.
+ *
+ * Note that it is not this function's task to prevent overlapping
+ * ranges, as the GMR was already given so it is too late to do
+ * anything. If the ranges overlap here it must surely be because
+ * PIPE_TRANSFER_UNSYNCHRONIZED was set.
*/
sbuf->map.ranges[i].start = MIN2(sbuf->map.ranges[i].start, start);
@@ -369,8 +384,7 @@ svga_buffer_add_range(struct svga_buffer *sbuf,
* pending DMA upload and start clean.
*/
- if(sbuf->dma.pending)
- svga_buffer_upload_flush(sbuf->dma.svga, sbuf);
+ svga_buffer_upload_flush(sbuf->dma.svga, sbuf);
assert(!sbuf->dma.pending);
assert(!sbuf->dma.svga);
@@ -638,3 +652,54 @@ svga_context_flush_buffers(struct svga_context *svga)
next = curr->next;
}
}
+
+
+void
+svga_redefine_user_buffer(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned offset,
+ unsigned size)
+{
+ struct svga_screen *ss = svga_screen(pipe->screen);
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_buffer *sbuf = svga_buffer(resource);
+
+ assert(sbuf->user);
+
+ /*
+ * Release any uploaded user buffer.
+ *
+ * TODO: As an optimization, we could try to update the uploaded buffer
+ * instead.
+ */
+
+ pipe_resource_reference(&sbuf->uploaded.buffer, NULL);
+
+ pipe_mutex_lock(ss->swc_mutex);
+
+ if (offset + size > resource->width0) {
+ /*
+ * User buffers shouldn't have DMA directly, unless
+ * SVGA_COMBINE_USERBUFFERS is not set.
+ */
+
+ if (sbuf->dma.pending) {
+ svga_buffer_upload_flush(svga, sbuf);
+ }
+
+ if (sbuf->handle) {
+ svga_buffer_destroy_host_surface(ss, sbuf);
+ }
+
+ if (sbuf->hwbuf) {
+ svga_buffer_destroy_hw_storage(ss, sbuf);
+ }
+
+ sbuf->key.size.width = sbuf->b.b.width0 = offset + size;
+ }
+
+ pipe_mutex_unlock(ss->swc_mutex);
+
+ svga->curr.any_user_vertex_buffers = TRUE;
+ svga->dirty |= SVGA_NEW_VBUFFER | SVGA_NEW_VELEMENT;
+}
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.h b/src/gallium/drivers/svga/svga_resource_buffer_upload.h
index 11df306526..13d8f3e299 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.h
@@ -28,6 +28,10 @@
void
+svga_buffer_upload_flush(struct svga_context *svga,
+ struct svga_buffer *sbuf);
+
+void
svga_buffer_add_range(struct svga_buffer *sbuf,
unsigned start,
unsigned end);
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index 7c9e600b9f..b61f85955a 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -48,31 +48,6 @@
#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-static unsigned int
-svga_texture_is_referenced( struct pipe_context *pipe,
- struct pipe_resource *texture,
- unsigned level, int layer)
-{
- struct svga_texture *tex = svga_texture(texture);
- struct svga_screen *ss = svga_screen(pipe->screen);
-
- /**
- * The screen does not cache texture writes.
- */
-
- if (!tex->handle || ss->sws->surface_is_flushed(ss->sws, tex->handle))
- return PIPE_UNREFERENCED;
-
- /**
- * sws->surface_is_flushed() does not distinguish between read references
- * and write references. So assume a reference is both.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-
-
/*
* Helper function and arrays
*/
@@ -146,16 +121,6 @@ svga_translate_format_render(enum pipe_format format)
case PIPE_FORMAT_L8_UNORM:
return svga_translate_format(format);
-#if 1
- /* For on host conversion */
- case PIPE_FORMAT_DXT1_RGB:
- return SVGA3D_X8R8G8B8;
- case PIPE_FORMAT_DXT1_RGBA:
- case PIPE_FORMAT_DXT3_RGBA:
- case PIPE_FORMAT_DXT5_RGBA:
- return SVGA3D_A8R8G8B8;
-#endif
-
default:
return SVGA3D_FORMAT_INVALID;
}
@@ -166,7 +131,8 @@ static INLINE void
svga_transfer_dma_band(struct svga_context *svga,
struct svga_transfer *st,
SVGA3dTransferType transfer,
- unsigned y, unsigned h, unsigned srcy)
+ unsigned y, unsigned h, unsigned srcy,
+ SVGA3dSurfaceDMAFlags flags)
{
struct svga_texture *texture = svga_texture(st->base.resource);
SVGA3dCopyBox box;
@@ -202,10 +168,10 @@ svga_transfer_dma_band(struct svga_context *svga,
util_format_get_blocksize(texture->b.b.format) * 8 /
(util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format)));
- ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1);
+ ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
if(ret != PIPE_OK) {
- svga->swc->flush(svga->swc, NULL);
- ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1);
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
assert(ret == PIPE_OK);
}
}
@@ -214,7 +180,8 @@ svga_transfer_dma_band(struct svga_context *svga,
static INLINE void
svga_transfer_dma(struct svga_context *svga,
struct svga_transfer *st,
- SVGA3dTransferType transfer)
+ SVGA3dTransferType transfer,
+ SVGA3dSurfaceDMAFlags flags)
{
struct svga_texture *texture = svga_texture(st->base.resource);
struct svga_screen *screen = svga_screen(texture->b.b.screen);
@@ -225,11 +192,17 @@ svga_transfer_dma(struct svga_context *svga,
SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__);
}
+ /* Ensure any pending operations on host surfaces are queued on the command
+ * buffer first.
+ */
+ svga_surfaces_flush( svga );
if(!st->swbuf) {
/* Do the DMA transfer in a single go */
- svga_transfer_dma_band(svga, st, transfer, st->base.box.y, st->base.box.height, 0);
+ svga_transfer_dma_band(svga, st, transfer,
+ st->base.box.y, st->base.box.height, 0,
+ flags);
if(transfer == SVGA3D_READ_HOST_VRAM) {
svga_context_flush(svga, &fence);
@@ -275,7 +248,14 @@ svga_transfer_dma(struct svga_context *svga,
}
}
- svga_transfer_dma_band(svga, st, transfer, y, h, srcy);
+ svga_transfer_dma_band(svga, st, transfer, y, h, srcy, flags);
+
+ /*
+ * Prevent the texture contents to be discarded on the next band
+ * upload.
+ */
+
+ flags.discard = FALSE;
if(transfer == SVGA3D_READ_HOST_VRAM) {
svga_context_flush(svga, &fence);
@@ -390,18 +370,25 @@ svga_texture_get_transfer(struct pipe_context *pipe,
if(st->hw_nblocksy < nblocksy) {
/* We couldn't allocate a hardware buffer big enough for the transfer,
* so allocate regular malloc memory instead */
- debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n",
- __FUNCTION__,
- (nblocksy*st->base.stride + 1023)/1024,
- (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
- (st->hw_nblocksy*st->base.stride + 1023)/1024);
+ if (0) {
+ debug_printf("%s: failed to allocate %u KB of DMA, "
+ "splitting into %u x %u KB DMA transfers\n",
+ __FUNCTION__,
+ (nblocksy*st->base.stride + 1023)/1024,
+ (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
+ (st->hw_nblocksy*st->base.stride + 1023)/1024);
+ }
+
st->swbuf = MALLOC(nblocksy*st->base.stride);
if(!st->swbuf)
goto no_swbuf;
}
- if (usage & PIPE_TRANSFER_READ)
- svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM);
+ if (usage & PIPE_TRANSFER_READ) {
+ SVGA3dSurfaceDMAFlags flags;
+ memset(&flags, 0, sizeof flags);
+ svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM, flags);
+ }
return &st->base;
@@ -460,7 +447,17 @@ svga_texture_transfer_destroy(struct pipe_context *pipe,
struct svga_transfer *st = svga_transfer(transfer);
if (st->base.usage & PIPE_TRANSFER_WRITE) {
- svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM);
+ SVGA3dSurfaceDMAFlags flags;
+
+ memset(&flags, 0, sizeof flags);
+ if (transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+ flags.discard = TRUE;
+ }
+ if (transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
+ flags.unsynchronized = TRUE;
+ }
+
+ svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM, flags);
ss->texture_timestamp++;
tex->view_age[transfer->level] = ++(tex->age);
if (transfer->resource->target == PIPE_TEXTURE_CUBE)
@@ -483,7 +480,6 @@ struct u_resource_vtbl svga_texture_vtbl =
{
svga_texture_get_handle, /* get_handle */
svga_texture_destroy, /* resource_destroy */
- svga_texture_is_referenced, /* is_resource_referenced */
svga_texture_get_transfer, /* get_transfer */
svga_texture_transfer_destroy, /* transfer_destroy */
svga_texture_transfer_map, /* transfer_map */
@@ -527,7 +523,8 @@ svga_texture_create(struct pipe_screen *screen,
tex->key.numFaces = 1;
}
- tex->key.cachable = 1;
+ /* XXX: Disabled for now */
+ tex->key.cachable = 0;
if (template->bind & PIPE_BIND_SAMPLER_VIEW)
tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE;
@@ -571,6 +568,9 @@ svga_texture_create(struct pipe_screen *screen,
if (tex->handle)
SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle);
+ debug_reference(&tex->b.b.reference,
+ (debug_reference_descriptor)debug_describe_resource, 0);
+
return &tex->b.b;
error2:
diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c
index 6911f13f77..4f1f4b597e 100644
--- a/src/gallium/drivers/svga/svga_sampler_view.c
+++ b/src/gallium/drivers/svga/svga_sampler_view.c
@@ -32,6 +32,7 @@
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
#include "svga_screen.h"
#include "svga_context.h"
@@ -41,14 +42,24 @@
#include "svga_surface.h"
+void
+svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv)
+{
+ char res[128];
+ debug_describe_resource(res, sv->texture);
+ util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod);
+}
+
struct svga_sampler_view *
svga_get_tex_sampler_view(struct pipe_context *pipe,
struct pipe_resource *pt,
unsigned min_lod, unsigned max_lod)
{
- struct svga_screen *ss = svga_screen(pt->screen);
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_screen *ss = svga_screen(pipe->screen);
struct svga_texture *tex = svga_texture(pt);
struct svga_sampler_view *sv = NULL;
+ SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE;
SVGA3dSurfaceFormat format = svga_translate_format(pt->format);
boolean view = TRUE;
@@ -68,10 +79,6 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
if (min_lod == 0 && max_lod >= pt->last_level)
view = FALSE;
- if (util_format_is_s3tc(pt->format) && view) {
- format = svga_translate_format_render(pt->format);
- }
-
if (ss->debug.no_sampler_view)
view = FALSE;
@@ -113,6 +120,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
pt->last_level);
sv->key.cachable = 0;
sv->handle = tex->handle;
+ debug_reference(&sv->reference,
+ (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
return sv;
}
@@ -126,7 +135,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
pt->last_level);
sv->age = tex->age;
- sv->handle = svga_texture_view_surface(pipe, tex, format,
+ sv->handle = svga_texture_view_surface(svga, tex, flags, format,
min_lod,
max_lod - min_lod + 1,
-1, -1,
@@ -136,6 +145,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
assert(0);
sv->key.cachable = 0;
sv->handle = tex->handle;
+ debug_reference(&sv->reference,
+ (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
return sv;
}
@@ -143,6 +154,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
svga_sampler_view_reference(&tex->cached_view, sv);
pipe_mutex_unlock(ss->tex_mutex);
+ debug_reference(&sv->reference,
+ (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
+
return sv;
}
diff --git a/src/gallium/drivers/svga/svga_sampler_view.h b/src/gallium/drivers/svga/svga_sampler_view.h
index e64665f2e5..2087c1be85 100644
--- a/src/gallium/drivers/svga/svga_sampler_view.h
+++ b/src/gallium/drivers/svga/svga_sampler_view.h
@@ -83,12 +83,16 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
void
svga_destroy_sampler_view_priv(struct svga_sampler_view *v);
+void
+svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv);
+
static INLINE void
svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v)
{
struct svga_sampler_view *old = *ptr;
- if (pipe_reference(&(*ptr)->reference, &v->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &v->reference,
+ (debug_reference_descriptor)svga_debug_describe_sampler_view))
svga_destroy_sampler_view_priv(old);
*ptr = v;
}
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 078190342a..6c987abe05 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -35,7 +35,6 @@
#include "svga_resource_texture.h"
#include "svga_resource.h"
#include "svga_debug.h"
-#include "svga_surface.h"
#include "svga3d_shaderdefs.h"
@@ -226,13 +225,18 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
return svgascreen->use_ps30 ? 32 : 12;
return result.u;
case PIPE_SHADER_CAP_MAX_ADDRS:
- return svgascreen->use_ps30 ? 1 : 0;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ /*
+ * Although PS 3.0 has some addressing abilities it can only represent
+ * loops that can be statically determined and unrolled. Given we can
+ * only handle a subset of the cases that the state tracker already
+ * does it is better to defer loop unrolling to the state tracker.
+ */
+ return 0;
case PIPE_SHADER_CAP_MAX_PREDS:
return svgascreen->use_ps30 ? 1 : 0;
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
- case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
- return svgascreen->use_ps30 ? 1 : 0;
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
@@ -338,8 +342,7 @@ svga_is_format_supported( struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage,
- unsigned geom_flags )
+ unsigned tex_usage)
{
struct svga_winsys_screen *sws = svga_screen(screen)->sws;
SVGA3dDevCapIndex index;
@@ -361,13 +364,6 @@ svga_is_format_supported( struct pipe_screen *screen,
case PIPE_FORMAT_B5G5R5A1_UNORM:
return FALSE;
- /* Simulate ability to render into compressed textures */
- case PIPE_FORMAT_DXT1_RGB:
- case PIPE_FORMAT_DXT1_RGBA:
- case PIPE_FORMAT_DXT3_RGBA:
- case PIPE_FORMAT_DXT5_RGBA:
- return TRUE;
-
default:
break;
}
@@ -415,27 +411,26 @@ svga_fence_reference(struct pipe_screen *screen,
}
-static int
+static boolean
svga_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flag)
+ struct pipe_fence_handle *fence)
{
struct svga_winsys_screen *sws = svga_screen(screen)->sws;
- return sws->fence_signalled(sws, fence, flag);
+ return sws->fence_signalled(sws, fence, 0) == 0;
}
-static int
+static boolean
svga_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
- unsigned flag)
+ uint64_t timeout)
{
struct svga_winsys_screen *sws = svga_screen(screen)->sws;
SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n",
__FUNCTION__, fence);
- return sws->fence_finish(sws, fence, flag);
+ return sws->fence_finish(sws, fence, 0) == 0;
}
@@ -501,6 +496,12 @@ svga_screen_create(struct svga_winsys_screen *sws)
svga_init_screen_resource_functions(svgascreen);
+ if (sws->get_hw_version) {
+ svgascreen->hw_version = sws->get_hw_version(sws);
+ } else {
+ svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1;
+ }
+
svgascreen->use_ps30 =
sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) &&
result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE;
diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h
index 86ec89d88c..7ef627f928 100644
--- a/src/gallium/drivers/svga/svga_screen.h
+++ b/src/gallium/drivers/svga/svga_screen.h
@@ -49,6 +49,8 @@ struct svga_screen
struct pipe_screen screen;
struct svga_winsys_screen *sws;
+ SVGA3dHardwareVersion hw_version;
+
unsigned use_ps30;
unsigned use_vs30;
diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h
index 22d5a6d552..7f239e7a32 100644
--- a/src/gallium/drivers/svga/svga_state.h
+++ b/src/gallium/drivers/svga/svga_state.h
@@ -92,4 +92,8 @@ void svga_update_state_retry( struct svga_context *svga,
enum pipe_error svga_emit_initial_state( struct svga_context *svga );
+enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga );
+
+enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga );
+
#endif
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index 97c818cd37..6c3275e74c 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -40,9 +40,12 @@
/* Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
*/
-static int svga_shader_type( int unit )
+static int svga_shader_type( int shader )
{
- return unit + 1;
+ assert(PIPE_SHADER_VERTEX + 1 == SVGA3D_SHADERTYPE_VS);
+ assert(PIPE_SHADER_FRAGMENT + 1 == SVGA3D_SHADERTYPE_PS);
+ assert(shader <= PIPE_SHADER_FRAGMENT);
+ return shader + 1;
}
@@ -110,7 +113,7 @@ static int emit_consts( struct svga_context *svga,
done:
if (data)
- pipe_buffer_unmap(&svga->pipe, svga->curr.cb[unit], transfer);
+ pipe_buffer_unmap(&svga->pipe, transfer);
return ret;
}
diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c
index fcbb35e797..cdadb20c17 100644
--- a/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -93,6 +93,55 @@ static int emit_framebuffer( struct svga_context *svga,
}
+/*
+ * Rebind rendertargets.
+ *
+ * Similar to emit_framebuffer, but without any state checking/update.
+ *
+ * Called at the beginning of every new command buffer to ensure that
+ * non-dirty rendertargets are properly paged-in.
+ */
+enum pipe_error
+svga_reemit_framebuffer_bindings(struct svga_context *svga)
+{
+ struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+ unsigned i;
+ enum pipe_error ret;
+
+ for (i = 0; i < MIN2(PIPE_MAX_COLOR_BUFS, 8); ++i) {
+ if (hw->cbufs[i]) {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, hw->cbufs[i]);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+ }
+
+ if (hw->zsbuf) {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, hw->zsbuf);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+
+ if (hw->zsbuf &&
+ hw->zsbuf->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+ else {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, NULL);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+ }
+
+ return PIPE_OK;
+}
+
+
struct svga_tracked_state svga_hw_framebuffer =
{
"hw framebuffer state",
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index ad6f294713..9c04adec8e 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -136,7 +136,7 @@ static int make_fs_key( const struct svga_context *svga,
/* The blend workaround for simulating logicop xor behaviour
* requires that the incoming fragment color be white. This change
- * achieves that by creating a varient of the current fragment
+ * achieves that by creating a variant of the current fragment
* shader that overrides all output colors with 1,1,1,1
*
* This will work for most shaders, including those containing
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
index 66fea02a4b..68c0257878 100644
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -35,6 +35,11 @@
/***********************************************************************
*/
+
+/**
+ * Given a gallium vertex element format, return the corresponding SVGA3D
+ * format. Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats.
+ */
static INLINE SVGA3dDeclType
svga_translate_vertex_format(enum pipe_format format)
{
@@ -80,6 +85,7 @@ static int update_need_swvfetch( struct svga_context *svga,
for (i = 0; i < svga->curr.velems->count; i++) {
svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.velems->velem[i].src_format);
if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) {
+ /* Unsupported format - use software fetch */
need_swvfetch = TRUE;
break;
}
@@ -118,6 +124,11 @@ static int update_need_pipeline( struct svga_context *svga,
__FUNCTION__,
svga->curr.rast->need_pipeline,
(1 << svga->curr.reduced_prim) );
+ SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline tris (%s), lines (%s), points (%s)\n",
+ __FUNCTION__,
+ svga->curr.rast->need_pipeline_tris_str,
+ svga->curr.rast->need_pipeline_lines_str,
+ svga->curr.rast->need_pipeline_points_str);
need_pipeline = TRUE;
}
@@ -140,6 +151,10 @@ static int update_need_pipeline( struct svga_context *svga,
svga->dirty |= SVGA_NEW_NEED_PIPELINE;
}
+ /* DEBUG */
+ if (0 && svga->state.sw.need_pipeline)
+ debug_printf("sw.need_pipeline = %d\n", svga->state.sw.need_pipeline);
+
return 0;
}
@@ -164,20 +179,28 @@ static int update_need_swtnl( struct svga_context *svga,
boolean need_swtnl;
if (svga->debug.no_swtnl) {
- svga->state.sw.need_swvfetch = 0;
- svga->state.sw.need_pipeline = 0;
+ svga->state.sw.need_swvfetch = FALSE;
+ svga->state.sw.need_pipeline = FALSE;
}
need_swtnl = (svga->state.sw.need_swvfetch ||
svga->state.sw.need_pipeline);
if (svga->debug.force_swtnl) {
- need_swtnl = 1;
+ need_swtnl = TRUE;
}
+ /*
+ * Some state changes the draw module does makes us belive we
+ * we don't need swtnl. This causes the vdecl code to pickup
+ * the wrong buffers and vertex formats. Try trivial/line-wide.
+ */
+ if (svga->state.sw.in_swtnl_draw)
+ need_swtnl = TRUE;
+
if (need_swtnl != svga->state.sw.need_swtnl) {
SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF,
- "%s need_swvfetch: %s, need_pipeline %s\n",
+ "%s: need_swvfetch %s, need_pipeline %s\n",
__FUNCTION__,
svga->state.sw.need_swvfetch ? "true" : "false",
svga->state.sw.need_pipeline ? "true" : "false");
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
index f8b269a101..c502506b93 100644
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -52,6 +52,16 @@ void svga_cleanup_tss_binding(struct svga_context *svga)
}
+struct bind_queue {
+ struct {
+ unsigned unit;
+ struct svga_hw_view_state *view;
+ } bind[PIPE_MAX_SAMPLERS];
+
+ unsigned bind_count;
+};
+
+
static int
update_tss_binding(struct svga_context *svga,
unsigned dirty )
@@ -63,15 +73,7 @@ update_tss_binding(struct svga_context *svga,
unsigned min_lod;
unsigned max_lod;
-
- struct {
- struct {
- unsigned unit;
- struct svga_hw_view_state *view;
- } bind[PIPE_MAX_SAMPLERS];
-
- unsigned bind_count;
- } queue;
+ struct bind_queue queue;
queue.bind_count = 0;
@@ -164,6 +166,64 @@ fail:
}
+/*
+ * Rebind textures.
+ *
+ * Similar to update_tss_binding, but without any state checking/update.
+ *
+ * Called at the beginning of every new command buffer to ensure that
+ * non-dirty textures are properly paged-in.
+ */
+enum pipe_error
+svga_reemit_tss_bindings(struct svga_context *svga)
+{
+ unsigned i;
+ enum pipe_error ret;
+ struct bind_queue queue;
+
+ queue.bind_count = 0;
+
+ for (i = 0; i < svga->state.hw_draw.num_views; i++) {
+ struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
+
+ if (view->v) {
+ queue.bind[queue.bind_count].unit = i;
+ queue.bind[queue.bind_count].view = view;
+ queue.bind_count++;
+ }
+ }
+
+ if (queue.bind_count) {
+ SVGA3dTextureState *ts;
+
+ ret = SVGA3D_BeginSetTextureState(svga->swc,
+ &ts,
+ queue.bind_count);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+
+ for (i = 0; i < queue.bind_count; i++) {
+ struct svga_winsys_surface *handle;
+
+ ts[i].stage = queue.bind[i].unit;
+ ts[i].name = SVGA3D_TS_BIND_TEXTURE;
+
+ assert(queue.bind[i].view->v);
+ handle = queue.bind[i].view->v->handle;
+ svga->swc->surface_relocation(svga->swc,
+ &ts[i].value,
+ handle,
+ SVGA_RELOC_READ);
+ }
+
+ SVGA_FIFOCommitAll(svga->swc);
+ }
+
+ return PIPE_OK;
+}
+
+
struct svga_tracked_state svga_hw_tss_binding = {
"texture binding emit",
SVGA_NEW_TEXTURE_BINDING |
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
index 3af7bf2b35..2f85f9488f 100644
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -57,12 +57,14 @@ upload_user_buffers( struct svga_context *svga )
struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer);
if (!buffer->uploaded.buffer) {
+ boolean flushed;
ret = u_upload_buffer( svga->upload_vb,
- 0,
+ 0, 0,
buffer->b.b.width0,
&buffer->b.b,
&buffer->uploaded.offset,
- &buffer->uploaded.buffer );
+ &buffer->uploaded.buffer,
+ &flushed);
if (ret)
return ret;
@@ -76,7 +78,6 @@ upload_user_buffers( struct svga_context *svga )
buffer->b.b.width0);
}
- pipe_resource_reference( &svga->curr.vb[i].buffer, buffer->uploaded.buffer );
svga->curr.vb[i].buffer_offset = buffer->uploaded.offset;
}
}
@@ -108,6 +109,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
for (i = 0; i < svga->curr.velems->count; i++) {
const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
unsigned usage, index;
+ struct svga_buffer *buffer = svga_buffer(vb->buffer);
svga_generate_vdecl_semantics( i, &usage, &index );
@@ -125,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
svga_hwtnl_vdecl( svga->hwtnl,
i,
&decl,
+ buffer->uploaded.buffer ? buffer->uploaded.buffer :
vb->buffer );
}
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 5133c70593..ae9a20ebb8 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -229,13 +229,11 @@ static int update_zero_stride( struct svga_context *svga,
translate->set_buffer(translate, vel->vertex_buffer_index,
mapped_buffer,
- vbuffer->stride, vbuffer->max_index);
+ vbuffer->stride, ~0);
translate->run(translate, 0, 1, 0,
svga->curr.zero_stride_constants);
- pipe_buffer_unmap(&svga->pipe,
- vbuffer->buffer,
- transfer);
+ pipe_buffer_unmap(&svga->pipe, transfer);
translate->release(translate);
}
diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c
index 3e4bed76c0..3e8fb5f027 100644
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@@ -100,8 +100,9 @@ svga_texture_copy_handle(struct svga_context *svga,
struct svga_winsys_surface *
-svga_texture_view_surface(struct pipe_context *pipe,
+svga_texture_view_surface(struct svga_context *svga,
struct svga_texture *tex,
+ SVGA3dSurfaceFlags flags,
SVGA3dSurfaceFormat format,
unsigned start_mip,
unsigned num_mip,
@@ -109,7 +110,7 @@ svga_texture_view_surface(struct pipe_context *pipe,
int zslice_pick,
struct svga_host_surface_cache_key *key) /* OUT */
{
- struct svga_screen *ss = svga_screen(pipe->screen);
+ struct svga_screen *ss = svga_screen(svga->pipe.screen);
struct svga_winsys_surface *handle;
uint32_t i, j;
unsigned z_offset = 0;
@@ -118,7 +119,7 @@ svga_texture_view_surface(struct pipe_context *pipe,
"svga: Create surface view: face %d zslice %d mips %d..%d\n",
face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
- key->flags = 0;
+ key->flags = flags;
key->format = format;
key->numMipLevels = num_mip;
key->size.width = u_minify(tex->b.b.width0, start_mip);
@@ -161,7 +162,7 @@ svga_texture_view_surface(struct pipe_context *pipe,
u_minify(tex->b.b.depth0, i + start_mip) :
1);
- svga_texture_copy_handle(svga_context(pipe),
+ svga_texture_copy_handle(svga,
tex->handle,
0, 0, z_offset,
i + start_mip,
@@ -183,6 +184,7 @@ svga_create_surface(struct pipe_context *pipe,
struct pipe_resource *pt,
const struct pipe_surface *surf_tmpl)
{
+ struct svga_context *svga = svga_context(pipe);
struct svga_texture *tex = svga_texture(pt);
struct pipe_screen *screen = pipe->screen;
struct svga_surface *s;
@@ -191,6 +193,7 @@ svga_create_surface(struct pipe_context *pipe,
boolean render = (surf_tmpl->usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DEPTH_STENCIL)) ? TRUE : FALSE;
boolean view = FALSE;
+ SVGA3dSurfaceFlags flags;
SVGA3dSurfaceFormat format;
assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
@@ -219,10 +222,18 @@ svga_create_surface(struct pipe_context *pipe,
s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
- if (!render)
+ if (!render) {
+ flags = SVGA3D_SURFACE_HINT_TEXTURE;
format = svga_translate_format(surf_tmpl->format);
- else
+ } else {
+ if (surf_tmpl->usage & PIPE_BIND_RENDER_TARGET) {
+ flags = SVGA3D_SURFACE_HINT_RENDERTARGET;
+ }
+ if (surf_tmpl->usage & PIPE_BIND_DEPTH_STENCIL) {
+ flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+ }
format = svga_translate_format_render(surf_tmpl->format);
+ }
assert(format != SVGA3D_FORMAT_INVALID);
@@ -249,7 +260,8 @@ svga_create_surface(struct pipe_context *pipe,
SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
pt, surf_tmpl->u.tex.level, face, zslice, s);
- s->handle = svga_texture_view_surface(NULL, tex, format, surf_tmpl->u.tex.level,
+ s->handle = svga_texture_view_surface(svga, tex, flags, format,
+ surf_tmpl->u.tex.level,
1, face, zslice, &s->key);
s->real_face = 0;
s->real_level = 0;
@@ -329,7 +341,7 @@ void svga_mark_surfaces_dirty(struct svga_context *svga)
* pipe is optional context to inline the blit command in.
*/
void
-svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf)
+svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf)
{
struct svga_surface *s = svga_surface(surf);
struct svga_texture *tex = svga_texture(surf->texture);
@@ -354,7 +366,7 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf)
if (s->handle != tex->handle) {
SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->u.tex.level, surf);
- svga_texture_copy_handle(svga_context(pipe),
+ svga_texture_copy_handle(svga,
s->handle, 0, 0, 0, s->real_level, s->real_face,
tex->handle, 0, 0, zslice, surf->u.tex.level, face,
u_minify(tex->b.b.width0, surf->u.tex.level),
diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h
index afb8326e1f..bffc8c22c6 100644
--- a/src/gallium/drivers/svga/svga_surface.h
+++ b/src/gallium/drivers/svga/svga_surface.h
@@ -56,14 +56,15 @@ struct svga_surface
extern void
-svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf);
+svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf);
extern boolean
svga_surface_needs_propagation(struct pipe_surface *surf);
struct svga_winsys_surface *
-svga_texture_view_surface(struct pipe_context *pipe,
+svga_texture_view_surface(struct svga_context *svga,
struct svga_texture *tex,
+ SVGA3dSurfaceFlags flags,
SVGA3dSurfaceFormat format,
unsigned start_mip,
unsigned num_mip,
diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c
index ff3da84272..ac9d637f8c 100644
--- a/src/gallium/drivers/svga/svga_swtnl_backend.c
+++ b/src/gallium/drivers/svga/svga_swtnl_backend.c
@@ -87,11 +87,14 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size);
svga_render->vbuf = pipe_buffer_create(screen,
PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
svga_render->vbuf_size);
if(!svga_render->vbuf) {
svga_context_flush(svga, NULL);
+ assert(!svga_render->vbuf);
svga_render->vbuf = pipe_buffer_create(screen,
PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
svga_render->vbuf_size);
assert(svga_render->vbuf);
}
@@ -141,7 +144,7 @@ svga_vbuf_render_unmap_vertices( struct vbuf_render *render,
pipe_buffer_flush_mapped_range(&svga->pipe,
svga_render->vbuf_transfer,
offset, length);
- pipe_buffer_unmap(&svga->pipe, svga_render->vbuf, svga_render->vbuf_transfer);
+ pipe_buffer_unmap(&svga->pipe, svga_render->vbuf_transfer);
svga_render->min_index = min_index;
svga_render->max_index = max_index;
svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used);
@@ -158,7 +161,7 @@ svga_vbuf_render_set_primitive( struct vbuf_render *render,
}
static void
-svga_vbuf_sumbit_state( struct svga_vbuf_render *svga_render )
+svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
{
struct svga_context *svga = svga_render->svga;
SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
@@ -221,7 +224,8 @@ svga_vbuf_render_draw_arrays( struct vbuf_render *render,
unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
enum pipe_error ret = 0;
- svga_vbuf_sumbit_state(svga_render);
+ /* off to hardware */
+ svga_vbuf_submit_state(svga_render);
/* Need to call update_state() again as the draw module may have
* altered some of our state behind our backs. Testcase:
@@ -260,6 +264,7 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size);
svga_render->ibuf = pipe_buffer_create(screen,
PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STREAM,
svga_render->ibuf_size);
svga_render->ibuf_offset = 0;
}
@@ -267,9 +272,8 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
pipe_buffer_write_nooverlap(&svga->pipe, svga_render->ibuf,
svga_render->ibuf_offset, 2 * nr_indices, indices);
-
/* off to hardware */
- svga_vbuf_sumbit_state(svga_render);
+ svga_vbuf_submit_state(svga_render);
/* Need to call update_state() again as the draw module may have
* altered some of our state behind our backs. Testcase:
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
index 814e8edd70..ad29c1b642 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -51,6 +51,9 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
assert(svga->state.sw.need_swtnl);
assert(draw);
+ /* Make sure that the need_swtnl flag does not go away */
+ svga->state.sw.in_swtnl_draw = TRUE;
+
ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW);
if (ret) {
svga_context_flush(svga, NULL);
@@ -106,22 +109,23 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
* unmap vertex/index buffers
*/
for (i = 0; i < svga->curr.num_vertex_buffers; i++) {
- pipe_buffer_unmap(&svga->pipe, svga->curr.vb[i].buffer,
- vb_transfer[i]);
+ pipe_buffer_unmap(&svga->pipe, vb_transfer[i]);
draw_set_mapped_vertex_buffer(draw, i, NULL);
}
if (ib_transfer) {
- pipe_buffer_unmap(&svga->pipe, svga->curr.ib.buffer, ib_transfer);
+ pipe_buffer_unmap(&svga->pipe, ib_transfer);
draw_set_mapped_index_buffer(draw, NULL);
}
if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
- pipe_buffer_unmap(&svga->pipe,
- svga->curr.cb[PIPE_SHADER_VERTEX],
- cb_transfer);
+ pipe_buffer_unmap(&svga->pipe, cb_transfer);
}
+ /* Now safe to remove the need_swtnl flag in any update_state call */
+ svga->state.sw.in_swtnl_draw = FALSE;
+ svga->dirty |= SVGA_NEW_NEED_PIPELINE | SVGA_NEW_NEED_SWVFETCH;
+
return ret;
}
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index a759238293..efda2f605b 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -61,7 +61,7 @@ static void set_draw_viewport( struct svga_context *svga )
* going to be drawn with triangles, but we're not catching all
* cases where that will happen.
*/
- if (svga->curr.rast->templ.line_width > 1.0)
+ if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
{
adjx = SVGA_LINE_ADJ_X + 0.175;
adjy = SVGA_LINE_ADJ_Y - 0.175;
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index f2591c5721..99600cf5c0 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -57,7 +57,6 @@ translate_opcode(
case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM;
- case TGSI_OPCODE_SSG: return SVGA3DOP_SGN;
default:
debug_printf("Unkown opcode %u\n", opcode);
assert( 0 );
@@ -285,6 +284,41 @@ static void reset_temp_regs( struct svga_shader_emitter *emit )
}
+/* Replace the src with the temporary specified in the dst, but copying
+ * only the necessary channels, and preserving the original swizzle (which is
+ * important given that several opcodes have constraints in the allowed
+ * swizzles).
+ */
+static boolean emit_repl( struct svga_shader_emitter *emit,
+ SVGA3dShaderDestToken dst,
+ struct src_register *src0)
+{
+ unsigned src0_swizzle;
+ unsigned chan;
+
+ assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
+
+ src0_swizzle = src0->base.swizzle;
+
+ dst.mask = 0;
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
+ dst.mask |= 1 << swizzle;
+ }
+ assert(dst.mask);
+
+ src0->base.swizzle = SVGA3DSWIZZLE_NONE;
+
+ if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
+ return FALSE;
+
+ *src0 = src( dst );
+ src0->base.swizzle = src0_swizzle;
+
+ return TRUE;
+}
+
+
static boolean submit_op0( struct svga_shader_emitter *emit,
SVGA3dShaderInstToken inst,
SVGA3dShaderDestToken dest )
@@ -333,14 +367,11 @@ static boolean submit_op2( struct svga_shader_emitter *emit,
src0.base.num != src1.base.num)
need_temp = TRUE;
- if (need_temp)
- {
+ if (need_temp) {
temp = get_temp( emit );
- if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ))
+ if (!emit_repl( emit, temp, &src0 ))
return FALSE;
-
- src0 = src( temp );
}
if (!emit_op2( emit, inst, dest, src0, src1 ))
@@ -396,24 +427,18 @@ static boolean submit_op3( struct svga_shader_emitter *emit,
(type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
need_temp1 = TRUE;
- if (need_temp0)
- {
+ if (need_temp0) {
temp0 = get_temp( emit );
- if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
+ if (!emit_repl( emit, temp0, &src0 ))
return FALSE;
-
- src0 = src( temp0 );
}
- if (need_temp1)
- {
+ if (need_temp1) {
temp1 = get_temp( emit );
- if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 ))
+ if (!emit_repl( emit, temp1, &src1 ))
return FALSE;
-
- src1 = src( temp1 );
}
if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
@@ -478,24 +503,18 @@ static boolean submit_op4( struct svga_shader_emitter *emit,
(type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
need_temp3 = TRUE;
- if (need_temp0)
- {
+ if (need_temp0) {
temp0 = get_temp( emit );
- if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
+ if (!emit_repl( emit, temp0, &src0 ))
return FALSE;
-
- src0 = src( temp0 );
}
- if (need_temp3)
- {
+ if (need_temp3) {
temp3 = get_temp( emit );
- if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 ))
+ if (!emit_repl( emit, temp3, &src3 ))
return FALSE;
-
- src3 = src( temp3 );
}
if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
@@ -509,6 +528,55 @@ static boolean submit_op4( struct svga_shader_emitter *emit,
}
+static boolean alias_src_dst( struct src_register src,
+ SVGA3dShaderDestToken dst )
+{
+ if (src.base.num != dst.num)
+ return FALSE;
+
+ if (SVGA3dShaderGetRegType(dst.value) !=
+ SVGA3dShaderGetRegType(src.base.value))
+ return FALSE;
+
+ return TRUE;
+}
+
+
+static boolean submit_lrp(struct svga_shader_emitter *emit,
+ SVGA3dShaderDestToken dst,
+ struct src_register src0,
+ struct src_register src1,
+ struct src_register src2)
+{
+ SVGA3dShaderDestToken tmp;
+ boolean need_dst_tmp = FALSE;
+
+ /* The dst reg must be a temporary, and not be the same as src0 or src2 */
+ if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
+ alias_src_dst(src0, dst) ||
+ alias_src_dst(src2, dst))
+ need_dst_tmp = TRUE;
+
+ if (need_dst_tmp) {
+ tmp = get_temp( emit );
+ tmp.mask = dst.mask;
+ }
+ else {
+ tmp = dst;
+ }
+
+ if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
+ return FALSE;
+
+ if (need_dst_tmp) {
+ if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
static boolean emit_def_const( struct svga_shader_emitter *emit,
SVGA3dShaderConstType type,
unsigned idx,
@@ -747,7 +815,7 @@ static boolean emit_fake_arl(struct svga_shader_emitter *emit,
static boolean emit_if(struct svga_shader_emitter *emit,
const struct tgsi_full_instruction *insn)
{
- const struct src_register src = translate_src_register(
+ struct src_register src0 = translate_src_register(
emit, &insn->Src[0] );
struct src_register zero = get_zero_immediate( emit );
SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
@@ -755,10 +823,23 @@ static boolean emit_if(struct svga_shader_emitter *emit,
if_token.control = SVGA3DOPCOMPC_NE;
zero = scalar(zero, TGSI_SWIZZLE_X);
+ if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
+ /*
+ * Max different constant registers readable per IFC instruction is 1.
+ */
+
+ SVGA3dShaderDestToken tmp = get_temp( emit );
+
+ if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
+ return FALSE;
+
+ src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
+ }
+
emit->dynamic_branching_level++;
return (emit_instruction( emit, if_token ) &&
- emit_src( emit, src ) &&
+ emit_src( emit, src0 ) &&
emit_src( emit, zero ) );
}
@@ -832,7 +913,7 @@ static boolean emit_cmp(struct svga_shader_emitter *emit,
*/
if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero))
return FALSE;
- return submit_op3(emit, inst_token(SVGA3DOP_LRP), dst, src(temp), src1, src2);
+ return submit_lrp(emit, dst, src(temp), src1, src2);
}
/* CMP DST, SRC0, SRC2, SRC1 */
@@ -1066,6 +1147,41 @@ static boolean emit_cos(struct svga_shader_emitter *emit,
return TRUE;
}
+static boolean emit_ssg(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ SVGA3dShaderDestToken temp0 = get_temp( emit );
+ SVGA3dShaderDestToken temp1 = get_temp( emit );
+ struct src_register zero, one;
+
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ /* SGN DST, SRC0, TMP0, TMP1 */
+ return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
+ src( temp0 ), src( temp1 ) );
+ }
+
+ zero = get_zero_immediate( emit );
+ one = scalar( zero, TGSI_SWIZZLE_W );
+ zero = scalar( zero, TGSI_SWIZZLE_X );
+
+ /* CMP TMP0, SRC0, one, zero */
+ if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
+ writemask( temp0, dst.mask ), src0, one, zero ))
+ return FALSE;
+
+ /* CMP TMP1, negate(SRC0), negate(one), zero */
+ if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
+ writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
+ zero ))
+ return FALSE;
+
+ /* ADD DST, TMP0, TMP1 */
+ return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
+ src( temp1 ) );
+}
/*
* ADD DST SRC0, negate(SRC0)
@@ -1588,6 +1704,10 @@ static boolean emit_deriv(struct svga_shader_emitter *emit,
}
else {
unsigned opcode;
+ const struct tgsi_full_src_register *reg = &insn->Src[0];
+ SVGA3dShaderInstToken inst;
+ SVGA3dShaderDestToken dst;
+ struct src_register src0;
switch (insn->Instruction.Opcode) {
case TGSI_OPCODE_DDX:
@@ -1600,7 +1720,21 @@ static boolean emit_deriv(struct svga_shader_emitter *emit,
return FALSE;
}
- return emit_simple_instruction( emit, opcode, insn );
+ inst = inst_token( opcode );
+ dst = translate_dst_register( emit, insn, 0 );
+ src0 = translate_src_register( emit, reg );
+
+ /* We cannot use negate or abs on source to dsx/dsy instruction.
+ */
+ if (reg->Register.Absolute ||
+ reg->Register.Negate) {
+ SVGA3dShaderDestToken temp = get_temp( emit );
+
+ if (!emit_repl( emit, temp, &src0 ))
+ return FALSE;
+ }
+
+ return submit_op1( emit, inst, dst, src0 );
}
}
@@ -1624,19 +1758,6 @@ static boolean emit_arl(struct svga_shader_emitter *emit,
}
}
-static boolean alias_src_dst( struct src_register src,
- SVGA3dShaderDestToken dst )
-{
- if (src.base.num != dst.num)
- return FALSE;
-
- if (SVGA3dShaderGetRegType(dst.value) !=
- SVGA3dShaderGetRegType(src.base.value))
- return FALSE;
-
- return TRUE;
-}
-
static boolean emit_pow(struct svga_shader_emitter *emit,
const struct tgsi_full_instruction *insn)
{
@@ -1729,37 +1850,14 @@ static boolean emit_lrp(struct svga_shader_emitter *emit,
const struct tgsi_full_instruction *insn)
{
SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
- SVGA3dShaderDestToken tmp;
const struct src_register src0 = translate_src_register(
emit, &insn->Src[0] );
const struct src_register src1 = translate_src_register(
emit, &insn->Src[1] );
const struct src_register src2 = translate_src_register(
emit, &insn->Src[2] );
- boolean need_dst_tmp = FALSE;
-
- /* The dst reg must not be the same as src0 or src2 */
- if (alias_src_dst(src0, dst) ||
- alias_src_dst(src2, dst))
- need_dst_tmp = TRUE;
- if (need_dst_tmp) {
- tmp = get_temp( emit );
- tmp.mask = dst.mask;
- }
- else {
- tmp = dst;
- }
-
- if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
- return FALSE;
-
- if (need_dst_tmp) {
- if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
- return FALSE;
- }
-
- return TRUE;
+ return submit_lrp(emit, dst, src0, src1, src2);
}
@@ -2366,6 +2464,9 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
case TGSI_OPCODE_LRP:
return emit_lrp( emit, insn );
+ case TGSI_OPCODE_SSG:
+ return emit_ssg( emit, insn );
+
default: {
unsigned opcode = translate_opcode(insn->Instruction.Opcode);
@@ -2715,6 +2816,7 @@ needs_to_create_zero( struct svga_shader_emitter *emit )
return TRUE;
if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
return TRUE;
}
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
index 5e4bdeff2e..ae61cea083 100644
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -136,6 +136,9 @@ struct svga_winsys_screen
void
(*destroy)(struct svga_winsys_screen *sws);
+ SVGA3dHardwareVersion
+ (*get_hw_version)(struct svga_winsys_screen *sws);
+
boolean
(*get_cap)(struct svga_winsys_screen *sws,
SVGA3dDevCapIndex index,
@@ -243,12 +246,12 @@ struct svga_winsys_screen
/**
* Map the entire data store of a buffer object into the client's address.
- * flags is a bitmaks of PIPE_TRANSFER_*
+ * usage is a bitmask of PIPE_TRANSFER_*
*/
void *
(*buffer_map)( struct svga_winsys_screen *sws,
struct svga_winsys_buffer *buf,
- unsigned flags );
+ unsigned usage );
void
(*buffer_unmap)( struct svga_winsys_screen *sws,
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
index 95612a8006..ad1549d9f8 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_op.c
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
@@ -136,7 +136,7 @@ static struct sh_opcode_info opcode_info[] =
{ "dsy", 1, 1, 0, 0, SVGA3DOP_INVALID, },
{ "texldd", 1, 4, 0, 0, SVGA3DOP_INVALID, },
{ "setp", 1, 2, 0, 0, SVGA3DOP_SETP, },
- { "texldl", 1, 2, 0, 0, SVGA3DOP_INVALID, },
+ { "texldl", 1, 2, 0, 0, SVGA3DOP_TEXLDL, },
{ "breakp", 0, 1, 0, 0, SVGA3DOP_INVALID, },
};
@@ -156,6 +156,8 @@ const struct sh_opcode_info *svga_opcode_info( uint op )
if (info->svga_opcode == SVGA3DOP_INVALID) {
/* No valid information. Please provide number of dst/src registers.
*/
+ _debug_printf("Missing information for opcode %u, '%s'\n", op,
+ opcode_info[op].mnemonic);
assert( 0 );
return NULL;
}
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index eaabae8ce4..4db7619c42 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -1184,7 +1184,6 @@ trace_context_clear_depth_stencil(struct pipe_context *_pipe,
static INLINE void
trace_context_flush(struct pipe_context *_pipe,
- unsigned flags,
struct pipe_fence_handle **fence)
{
struct trace_context *tr_ctx = trace_context(_pipe);
@@ -1193,9 +1192,8 @@ trace_context_flush(struct pipe_context *_pipe,
trace_dump_call_begin("pipe_context", "flush");
trace_dump_arg(ptr, pipe);
- trace_dump_arg(uint, flags);
- pipe->flush(pipe, flags, fence);
+ pipe->flush(pipe, fence);
if(fence)
trace_dump_ret(ptr, *fence);
@@ -1219,31 +1217,6 @@ trace_context_destroy(struct pipe_context *_pipe)
FREE(tr_ctx);
}
-static unsigned int
-trace_is_resource_referenced( struct pipe_context *_pipe,
- struct pipe_resource *_resource,
- unsigned level, int layer)
-{
- struct trace_context *tr_ctx = trace_context(_pipe);
- struct trace_resource *tr_tex = trace_resource(_resource);
- struct pipe_context *pipe = tr_ctx->pipe;
- struct pipe_resource *texture = tr_tex->resource;
- unsigned int referenced;
-
- trace_dump_call_begin("pipe_context", "is_resource_referenced");
- trace_dump_arg(ptr, pipe);
- trace_dump_arg(ptr, texture);
- trace_dump_arg(uint, level);
- trace_dump_arg(int, layer);
-
- referenced = pipe->is_resource_referenced(pipe, texture, level, layer);
-
- trace_dump_ret(uint, referenced);
- trace_dump_call_end();
-
- return referenced;
-}
-
/********************************************************************
* transfer
@@ -1419,6 +1392,28 @@ trace_context_transfer_inline_write(struct pipe_context *_context,
}
+static void trace_redefine_user_buffer(struct pipe_context *_context,
+ struct pipe_resource *_resource,
+ unsigned offset, unsigned size)
+{
+ struct trace_context *tr_context = trace_context(_context);
+ struct trace_resource *tr_tex = trace_resource(_resource);
+ struct pipe_context *context = tr_context->pipe;
+ struct pipe_resource *resource = tr_tex->resource;
+
+ assert(resource->screen == context->screen);
+
+ trace_dump_call_begin("pipe_context", "redefine_user_buffer");
+
+ trace_dump_arg(ptr, context);
+ trace_dump_arg(ptr, resource);
+ trace_dump_arg(uint, offset);
+ trace_dump_arg(uint, size);
+
+ trace_dump_call_end();
+
+ context->redefine_user_buffer(context, resource, offset, size);
+}
static const struct debug_named_value rbug_blocker_flags[] = {
@@ -1498,7 +1493,6 @@ trace_context_create(struct trace_screen *tr_scr,
tr_ctx->base.clear_render_target = trace_context_clear_render_target;
tr_ctx->base.clear_depth_stencil = trace_context_clear_depth_stencil;
tr_ctx->base.flush = trace_context_flush;
- tr_ctx->base.is_resource_referenced = trace_is_resource_referenced;
tr_ctx->base.get_transfer = trace_context_get_transfer;
tr_ctx->base.transfer_destroy = trace_context_transfer_destroy;
@@ -1506,6 +1500,7 @@ trace_context_create(struct trace_screen *tr_scr,
tr_ctx->base.transfer_unmap = trace_context_transfer_unmap;
tr_ctx->base.transfer_flush_region = trace_context_transfer_flush_region;
tr_ctx->base.transfer_inline_write = trace_context_transfer_inline_write;
+ tr_ctx->base.redefine_user_buffer = trace_redefine_user_buffer;
tr_ctx->pipe = pipe;
diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
index 51a4ea9633..8a4ec20fb8 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -60,10 +60,9 @@
static struct os_stream *stream = NULL;
static unsigned refcount = 0;
-static pipe_mutex call_mutex;
+pipe_static_mutex(call_mutex);
static long unsigned call_no = 0;
static boolean dumping = FALSE;
-static boolean initialized = FALSE;
static INLINE void
@@ -225,26 +224,13 @@ trace_dump_trace_close(void)
stream = NULL;
refcount = 0;
call_no = 0;
- pipe_mutex_destroy(call_mutex);
}
}
-void trace_dump_init()
-{
- if (initialized)
- return;
-
- pipe_mutex_init(call_mutex);
- dumping = FALSE;
- initialized = TRUE;
-}
-
boolean trace_dump_trace_begin()
{
const char *filename;
- assert(initialized);
-
filename = debug_get_option("GALLIUM_TRACE", NULL);
if(!filename)
return FALSE;
diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h
index 74c5e83e9e..62b4fe429b 100644
--- a/src/gallium/drivers/trace/tr_dump.h
+++ b/src/gallium/drivers/trace/tr_dump.h
@@ -43,11 +43,6 @@ struct pipe_transfer;
struct pipe_box;
/*
- * Call before use.
- */
-void trace_dump_init(void);
-
-/*
* Low level dumping controls.
*
* Opening the trace file and checking if that is opened.
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 155c869fbd..18805655bd 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -517,7 +517,6 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state)
trace_dump_struct_begin("pipe_vertex_buffer");
trace_dump_member(uint, state, stride);
- trace_dump_member(uint, state, max_index);
trace_dump_member(uint, state, buffer_offset);
trace_dump_member(resource_ptr, state, buffer);
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index c2de2daa88..42180c4f19 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -158,8 +158,7 @@ trace_screen_is_format_supported(struct pipe_screen *_screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage,
- unsigned geom_flags)
+ unsigned tex_usage)
{
struct trace_screen *tr_scr = trace_screen(_screen);
struct pipe_screen *screen = tr_scr->screen;
@@ -172,10 +171,9 @@ trace_screen_is_format_supported(struct pipe_screen *_screen,
trace_dump_arg(int, target);
trace_dump_arg(uint, sample_count);
trace_dump_arg(uint, tex_usage);
- trace_dump_arg(uint, geom_flags);
result = screen->is_format_supported(screen, format, target, sample_count,
- tex_usage, geom_flags);
+ tex_usage);
trace_dump_ret(bool, result);
@@ -393,10 +391,9 @@ trace_screen_fence_reference(struct pipe_screen *_screen,
}
-static int
+static boolean
trace_screen_fence_signalled(struct pipe_screen *_screen,
- struct pipe_fence_handle *fence,
- unsigned flags)
+ struct pipe_fence_handle *fence)
{
struct trace_screen *tr_scr = trace_screen(_screen);
struct pipe_screen *screen = tr_scr->screen;
@@ -406,11 +403,10 @@ trace_screen_fence_signalled(struct pipe_screen *_screen,
trace_dump_arg(ptr, screen);
trace_dump_arg(ptr, fence);
- trace_dump_arg(uint, flags);
- result = screen->fence_signalled(screen, fence, flags);
+ result = screen->fence_signalled(screen, fence);
- trace_dump_ret(int, result);
+ trace_dump_ret(bool, result);
trace_dump_call_end();
@@ -418,10 +414,10 @@ trace_screen_fence_signalled(struct pipe_screen *_screen,
}
-static int
+static boolean
trace_screen_fence_finish(struct pipe_screen *_screen,
struct pipe_fence_handle *fence,
- unsigned flags)
+ uint64_t timeout)
{
struct trace_screen *tr_scr = trace_screen(_screen);
struct pipe_screen *screen = tr_scr->screen;
@@ -431,11 +427,11 @@ trace_screen_fence_finish(struct pipe_screen *_screen,
trace_dump_arg(ptr, screen);
trace_dump_arg(ptr, fence);
- trace_dump_arg(uint, flags);
+ trace_dump_arg(uint, timeout);
- result = screen->fence_finish(screen, fence, flags);
+ result = screen->fence_finish(screen, fence, timeout);
- trace_dump_ret(int, result);
+ trace_dump_ret(bool, result);
trace_dump_call_end();
@@ -472,8 +468,6 @@ trace_enabled(void)
return trace;
firstrun = FALSE;
- trace_dump_init();
-
if(trace_dump_trace_begin()) {
trace_dumping_start();
trace = TRUE;
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index a842d541eb..ec9f105897 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -173,6 +173,18 @@ typedef unsigned char boolean;
# define __FUNCTION__ "<unknown>"
# endif
#endif
+#ifndef __func__
+# if (__STDC_VERSION__ >= 199901L) || \
+ (defined(__SUNPRO_C) && defined(__C99FEATURES__))
+ /* __func__ is part of C99 */
+# elif defined(_MSC_VER)
+# if _MSC_VER >= 1300
+# define __func__ __FUNCTION__
+# else
+# define __func__ "<unknown>"
+# endif
+# endif
+#endif
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 589cac2ddd..d8de3bac0e 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -312,29 +312,12 @@ struct pipe_context {
unsigned dstx, unsigned dsty,
unsigned width, unsigned height);
- /** Flush rendering
- * \param flags bitmask of PIPE_FLUSH_x tokens)
+ /** Flush draw commands
*/
void (*flush)( struct pipe_context *pipe,
- unsigned flags,
struct pipe_fence_handle **fence );
/**
- * Check whether a texture is referenced by an unflushed hw command.
- * The state-tracker uses this function to avoid unnecessary flushes.
- * It is safe (but wasteful) to always return
- * PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE.
- * \param pipe context whose unflushed hw commands will be checked.
- * \param texture texture to check.
- * \param level mipmap level.
- * \param layer cubemap face, 2d array or 3d slice, 0 otherwise. Use -1 for any layer.
- * \return mask of PIPE_REFERENCED_FOR_READ/WRITE or PIPE_UNREFERENCED
- */
- unsigned int (*is_resource_referenced)(struct pipe_context *pipe,
- struct pipe_resource *texture,
- unsigned level, int layer);
-
- /**
* Create a view on a texture to be used by a shader stage.
*/
struct pipe_sampler_view * (*create_sampler_view)(struct pipe_context *ctx,
@@ -399,6 +382,19 @@ struct pipe_context {
unsigned stride,
unsigned layer_stride);
+
+ /* Notify a driver that a content of a user buffer has been changed.
+ * The changed range is [offset, offset+size-1].
+ * The new width0 of the buffer is offset+size. */
+ void (*redefine_user_buffer)(struct pipe_context *,
+ struct pipe_resource *,
+ unsigned offset,
+ unsigned size);
+
+ /**
+ * Flush any pending framebuffer writes and invalidate texture caches.
+ */
+ void (*texture_barrier)(struct pipe_context *);
};
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index f5af15ff9f..bac3300df3 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -225,13 +225,13 @@ enum pipe_transfer_usage {
/**
* Discards the memory within the mapped region.
*
- * It should not be used with PIPE_TRANSFER_CPU_READ.
+ * It should not be used with PIPE_TRANSFER_READ.
*
* See also:
* - OpenGL's ARB_map_buffer_range extension, MAP_INVALIDATE_RANGE_BIT flag.
- * - Direct3D's D3DLOCK_DISCARD flag.
*/
- PIPE_TRANSFER_DISCARD = (1 << 8),
+ PIPE_TRANSFER_DISCARD = (1 << 8), /* DEPRECATED */
+ PIPE_TRANSFER_DISCARD_RANGE = (1 << 8),
/**
* Fail if the resource cannot be mapped immediately.
@@ -246,7 +246,7 @@ enum pipe_transfer_usage {
/**
* Do not attempt to synchronize pending operations on the resource when mapping.
*
- * It should not be used with PIPE_TRANSFER_CPU_READ.
+ * It should not be used with PIPE_TRANSFER_READ.
*
* See also:
* - OpenGL's ARB_map_buffer_range extension, MAP_UNSYNCHRONIZED_BIT flag.
@@ -260,13 +260,28 @@ enum pipe_transfer_usage {
* Written ranges will be notified later with
* pipe_context::transfer_flush_region.
*
- * It should not be used with PIPE_TRANSFER_CPU_READ.
+ * It should not be used with PIPE_TRANSFER_READ.
*
* See also:
* - pipe_context::transfer_flush_region
* - OpenGL's ARB_map_buffer_range extension, MAP_FLUSH_EXPLICIT_BIT flag.
*/
- PIPE_TRANSFER_FLUSH_EXPLICIT = (1 << 11)
+ PIPE_TRANSFER_FLUSH_EXPLICIT = (1 << 11),
+
+ /**
+ * Discards all memory backing the resource.
+ *
+ * It should not be used with PIPE_TRANSFER_READ.
+ *
+ * This is equivalent to:
+ * - OpenGL's ARB_map_buffer_range extension, MAP_INVALIDATE_BUFFER_BIT
+ * - BufferData(NULL) on a GL buffer
+ * - Direct3D's D3DLOCK_DISCARD flag.
+ * - WDDM's D3DDDICB_LOCKFLAGS.Discard flag.
+ * - D3D10 DDI's D3D10_DDI_MAP_WRITE_DISCARD flag
+ * - D3D10's D3D10_MAP_WRITE_DISCARD flag.
+ */
+ PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE = (1 << 12)
};
@@ -322,25 +337,6 @@ enum pipe_transfer_usage {
#define PIPE_USAGE_STAGING 5 /* supports data transfers from the GPU to the CPU */
-/* These are intended to be used in calls to is_format_supported, but
- * no driver actually uses these flags, and only the glx/xlib state
- * tracker issues them.
- *
- * Deprecate?
- */
-#define PIPE_TEXTURE_GEOM_NON_SQUARE 0x1
-#define PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO 0x2
-
-
-/**
- * Flush types:
- */
-#define PIPE_FLUSH_RENDER_CACHE 0x1
-#define PIPE_FLUSH_TEXTURE_CACHE 0x2
-#define PIPE_FLUSH_SWAPBUFFERS 0x4
-#define PIPE_FLUSH_FRAME 0x8 /**< Mark the end of a frame */
-
-
/**
* Shaders
*/
@@ -380,7 +376,8 @@ enum pipe_transfer_usage {
#define PIPE_QUERY_SO_STATISTICS 5
#define PIPE_QUERY_GPU_FINISHED 6
#define PIPE_QUERY_TIMESTAMP_DISJOINT 7
-#define PIPE_QUERY_TYPES 8
+#define PIPE_QUERY_OCCLUSION_PREDICATE 8
+#define PIPE_QUERY_TYPES 9
/**
@@ -410,6 +407,8 @@ enum pipe_transfer_usage {
#define PIPE_SWIZZLE_ONE 5
+#define PIPE_TIMEOUT_INFINITE 0xffffffffffffffffull
+
/**
* Implementation capabilities/limits which are queried through
* pipe_screen::get_param() and pipe_screen::get_paramf().
@@ -462,6 +461,8 @@ enum pipe_cap {
PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER,
PIPE_CAP_DEPTH_CLAMP,
PIPE_CAP_SHADER_STENCIL_EXPORT,
+ PIPE_CAP_TGSI_INSTANCEID,
+ PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR,
};
/* Shader caps not specific to any single stage */
@@ -487,13 +488,6 @@ enum pipe_shader_cap
PIPE_SHADER_CAP_SUBROUTINES, /* BGNSUB, ENDSUB, CAL, RET */
};
-/**
- * Referenced query flags.
- */
-
-#define PIPE_UNREFERENCED 0
-#define PIPE_REFERENCED_FOR_READ (1 << 0)
-#define PIPE_REFERENCED_FOR_WRITE (1 << 1)
/**
* Composite query types
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 22cc7aa18a..d53af598e7 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -33,6 +33,16 @@
extern "C" {
#endif
+
+enum pipe_type {
+ PIPE_TYPE_UNORM = 0,
+ PIPE_TYPE_SNORM,
+ PIPE_TYPE_SINT,
+ PIPE_TYPE_UINT,
+ PIPE_TYPE_FLOAT,
+ PIPE_TYPE_COUNT
+};
+
/**
* Texture/surface image formats (preliminary)
*/
@@ -190,6 +200,17 @@ enum pipe_format {
PIPE_FORMAT_X24S8_USCALED = 136,
PIPE_FORMAT_S8X24_USCALED = 137,
PIPE_FORMAT_X32_S8X24_USCALED = 138,
+
+ PIPE_FORMAT_B2G3R3_UNORM = 139,
+ PIPE_FORMAT_L16A16_UNORM = 140,
+ PIPE_FORMAT_A16_UNORM = 141,
+ PIPE_FORMAT_I16_UNORM = 142,
+
+ PIPE_FORMAT_LATC1_UNORM = 143,
+ PIPE_FORMAT_LATC1_SNORM = 144,
+ PIPE_FORMAT_LATC2_UNORM = 145,
+ PIPE_FORMAT_LATC2_SNORM = 146,
+
PIPE_FORMAT_COUNT
};
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 850eb84a3c..a7845dd24d 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -99,14 +99,12 @@ struct pipe_screen {
* Check if the given pipe_format is supported as a texture or
* drawing surface.
* \param bindings bitmask of PIPE_BIND_*
- * \param geom_flags bitmask of PIPE_TEXTURE_GEOM_*
*/
boolean (*is_format_supported)( struct pipe_screen *,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned bindings,
- unsigned geom_flags );
+ unsigned bindings );
/**
* Create a new texture object, using the given template info.
@@ -183,21 +181,17 @@ struct pipe_screen {
/**
* Checks whether the fence has been signalled.
- * \param flags driver-specific meaning
- * \return zero on success.
*/
- int (*fence_signalled)( struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flags );
+ boolean (*fence_signalled)( struct pipe_screen *screen,
+ struct pipe_fence_handle *fence );
/**
* Wait for the fence to finish.
- * \param flags driver-specific meaning
- * \return zero on success.
+ * \param timeout in nanoseconds
*/
- int (*fence_finish)( struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flags );
+ boolean (*fence_finish)( struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout );
};
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index ba433b2bd2..d3a3632654 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -75,6 +75,7 @@ enum tgsi_file_type {
TGSI_FILE_SYSTEM_VALUE =9,
TGSI_FILE_IMMEDIATE_ARRAY =10,
TGSI_FILE_TEMPORARY_ARRAY =11,
+ TGSI_FILE_RESOURCE =12,
TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */
};
@@ -153,6 +154,14 @@ struct tgsi_declaration_semantic
unsigned Padding : 8;
};
+struct tgsi_declaration_resource {
+ unsigned Resource : 8; /**< one of TGSI_TEXTURE_ */
+ unsigned ReturnTypeX : 6; /**< one of enum pipe_type */
+ unsigned ReturnTypeY : 6; /**< one of enum pipe_type */
+ unsigned ReturnTypeZ : 6; /**< one of enum pipe_type */
+ unsigned ReturnTypeW : 6; /**< one of enum pipe_type */
+};
+
#define TGSI_IMM_FLOAT32 0
#define TGSI_IMM_UINT32 1
#define TGSI_IMM_INT32 2
@@ -177,7 +186,8 @@ union tgsi_immediate_data
#define TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES 2
#define TGSI_PROPERTY_FS_COORD_ORIGIN 3
#define TGSI_PROPERTY_FS_COORD_PIXEL_CENTER 4
-#define TGSI_PROPERTY_COUNT 5
+#define TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS 5
+#define TGSI_PROPERTY_COUNT 6
struct tgsi_property {
unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */
@@ -338,7 +348,22 @@ struct tgsi_property_data {
#define TGSI_OPCODE_CASE 142
#define TGSI_OPCODE_DEFAULT 143
#define TGSI_OPCODE_ENDSWITCH 144
-#define TGSI_OPCODE_LAST 145
+
+/* resource related opcodes */
+#define TGSI_OPCODE_LOAD 145
+#define TGSI_OPCODE_LOAD_MS 146
+#define TGSI_OPCODE_SAMPLE 147
+#define TGSI_OPCODE_SAMPLE_B 148
+#define TGSI_OPCODE_SAMPLE_C 149
+#define TGSI_OPCODE_SAMPLE_C_LZ 150
+#define TGSI_OPCODE_SAMPLE_D 151
+#define TGSI_OPCODE_SAMPLE_L 152
+#define TGSI_OPCODE_GATHER4 153
+#define TGSI_OPCODE_RESINFO 154
+#define TGSI_OPCODE_SAMPLE_POS 155
+#define TGSI_OPCODE_SAMPLE_INFO 156
+
+#define TGSI_OPCODE_LAST 157
#define TGSI_SAT_NONE 0 /* do not saturate */
#define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */
@@ -405,7 +430,9 @@ struct tgsi_instruction_label
#define TGSI_TEXTURE_SHADOW1D 6
#define TGSI_TEXTURE_SHADOW2D 7
#define TGSI_TEXTURE_SHADOWRECT 8
-#define TGSI_TEXTURE_COUNT 9
+#define TGSI_TEXTURE_1D_ARRAY 9
+#define TGSI_TEXTURE_2D_ARRAY 10
+#define TGSI_TEXTURE_COUNT 11
struct tgsi_instruction_texture
{
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 226ae8667b..cf6c5b5026 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -62,6 +62,7 @@ extern "C" {
#define PIPE_MAX_GEOMETRY_SAMPLERS 16
#define PIPE_MAX_SHADER_INPUTS 32
#define PIPE_MAX_SHADER_OUTPUTS 32
+#define PIPE_MAX_SHADER_RESOURCES 32
#define PIPE_MAX_TEXTURE_LEVELS 16
#define PIPE_MAX_SO_BUFFERS 4
@@ -389,11 +390,11 @@ struct pipe_stream_output_state
struct pipe_transfer
{
struct pipe_resource *resource; /**< resource to transfer to/from */
- unsigned level;
+ unsigned level; /**< texture mipmap level */
enum pipe_transfer_usage usage;
- struct pipe_box box;
- unsigned stride;
- unsigned layer_stride;
+ struct pipe_box box; /**< region of the resource to access */
+ unsigned stride; /**< row stride in bytes */
+ unsigned layer_stride; /**< image/layer stride in bytes */
void *data;
};
@@ -407,7 +408,6 @@ struct pipe_transfer
struct pipe_vertex_buffer
{
unsigned stride; /**< stride to same attrib in next vertex, in bytes */
- unsigned max_index; /**< number of vertices in this buffer */
unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
struct pipe_resource *buffer; /**< the actual buffer */
};
diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h
index 1c2148b78f..d4973a10cc 100644
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@@ -121,6 +121,11 @@ enum st_context_resource_type {
};
/**
+ * Flush flags.
+ */
+#define ST_FLUSH_FRONT (1 << 0)
+
+/**
* Value to st_manager->get_param function.
*/
enum st_manager_param {
diff --git a/src/gallium/state_trackers/Android.mk b/src/gallium/state_trackers/Android.mk
index e0ae537505..65588694d6 100644
--- a/src/gallium/state_trackers/Android.mk
+++ b/src/gallium/state_trackers/Android.mk
@@ -28,6 +28,7 @@ LOCAL_C_INCLUDES := \
external/mesa/src/gallium/auxiliary \
external/mesa/src/egl/main \
external/mesa/src/gallium/state_trackers/egl \
+ external/mesa/src/gallium/winsys \
external/mesa/src/gallium/winsys/sw \
external/mesa/src/gralloc \
external/drm \
diff --git a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp
index a54324a04f..741a97f897 100644
--- a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp
+++ b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp
@@ -256,11 +256,16 @@ struct GalliumDXGIAdapter
{
this->parent = factory;
+ /* FIXME handler should be static */
handler.invalid_surface = handle_invalid_surface;
handler.new_drm_screen = dxgi_loader_create_drm_screen;
handler.new_sw_screen = dxgi_loader_create_sw_screen;
- display = platform->create_display(dpy, &handler, this);
+ platform->set_event_handler(&handler);
+
+ display = platform->create_display(dpy, FALSE, this);
if(!display)
+ display = platform->create_display(dpy, TRUE, this);
+ if(!display)
throw E_FAIL;
memset(&desc, 0, sizeof(desc));
std::string s = std::string("GalliumD3D on ") + display->screen->get_name(display->screen) + " by " + display->screen->get_vendor(display->screen);
@@ -789,9 +794,9 @@ struct dxgi_blitter
const unsigned semantic_indices[] = { 0, 0 };
vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, semantic_indices);
- vbuf.buffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(quad_data));
+ vbuf.buffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM, sizeof(quad_data));
vbuf.buffer_offset = 0;
- vbuf.max_index = ~0;
vbuf.stride = 4 * sizeof(float);
pipe_buffer_write(pipe, vbuf.buffer, 0, sizeof(quad_data), quad_data);
@@ -1229,7 +1234,7 @@ struct GalliumDXGISwapChain : public GalliumDXGIObject<IDXGISwapChain, GalliumDX
if(dst_surface)
pipe->surface_destroy(pipe, dst_surface);
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, 0);
+ pipe->flush(pipe, 0);
att = (db) ? NATIVE_ATTACHMENT_BACK_LEFT : NATIVE_ATTACHMENT_FRONT_LEFT;
if(!surface->present(surface, att, FALSE, 0))
diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
index e1ba6c184f..12f2aaddc9 100644
--- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
+++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
@@ -623,7 +623,6 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
vertex_buffers[start + i].buffer_offset = new_offsets[i];
vertex_buffers[start + i].stride = new_strides[i];
- vertex_buffers[start + i].max_index = ~0;
last_different = i;
}
}
@@ -1824,7 +1823,7 @@ changed:
virtual void STDMETHODCALLTYPE Flush(void)
{
SYNCHRONIZED;
- pipe->flush(pipe, PIPE_FLUSH_FRAME, 0);
+ pipe->flush(pipe, 0);
}
/* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h
index 9cfdc837d8..f678ac7f73 100644
--- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h
+++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h
@@ -299,25 +299,25 @@ struct GalliumD3D11ScreenImpl : public GalliumD3D11Screen
sampler_view |= D3D11_FORMAT_SUPPORT_SHADER_SAMPLE_COMPARISON;
/* TODO: do this properly when Gallium drivers actually support index/vertex format queries */
- if(screen->is_format_supported(screen, format, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER, 0)
- || (screen->is_format_supported(screen, format, PIPE_BUFFER, 0, PIPE_BIND_INDEX_BUFFER, 0)
+ if(screen->is_format_supported(screen, format, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER)
+ || (screen->is_format_supported(screen, format, PIPE_BUFFER, 0, PIPE_BIND_INDEX_BUFFER)
|| format == PIPE_FORMAT_R8_UNORM))
support |= buffer;
- if(screen->is_format_supported(screen, format, PIPE_BUFFER, 0, PIPE_BIND_STREAM_OUTPUT, 0))
+ if(screen->is_format_supported(screen, format, PIPE_BUFFER, 0, PIPE_BIND_STREAM_OUTPUT))
support |= buffer | D3D11_FORMAT_SUPPORT_SO_BUFFER;
- if(screen->is_format_supported(screen, format, PIPE_TEXTURE_1D, 0, PIPE_BIND_SAMPLER_VIEW, 0))
+ if(screen->is_format_supported(screen, format, PIPE_TEXTURE_1D, 0, PIPE_BIND_SAMPLER_VIEW))
support |= D3D11_FORMAT_SUPPORT_TEXTURE1D | sampler_view;
- if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW, 0))
+ if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW))
support |= D3D11_FORMAT_SUPPORT_TEXTURE2D | sampler_view;
- if(screen->is_format_supported(screen, format, PIPE_TEXTURE_CUBE, 0, PIPE_BIND_SAMPLER_VIEW, 0))
+ if(screen->is_format_supported(screen, format, PIPE_TEXTURE_CUBE, 0, PIPE_BIND_SAMPLER_VIEW))
support |= D3D11_FORMAT_SUPPORT_TEXTURE2D | sampler_view;
- if(screen->is_format_supported(screen, format, PIPE_TEXTURE_3D, 0, PIPE_BIND_SAMPLER_VIEW, 0))
+ if(screen->is_format_supported(screen, format, PIPE_TEXTURE_3D, 0, PIPE_BIND_SAMPLER_VIEW))
support |= D3D11_FORMAT_SUPPORT_TEXTURE3D | sampler_view;
- if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET, 0))
+ if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET))
support |= D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_RENDER_TARGET | D3D11_FORMAT_SUPPORT_BLENDABLE;
- if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_DEPTH_STENCIL, 0))
+ if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_DEPTH_STENCIL))
support |= D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_DEPTH_STENCIL;
- if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_DISPLAY_TARGET, 0))
+ if(screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, PIPE_BIND_DISPLAY_TARGET))
support |= D3D11_FORMAT_SUPPORT_DISPLAY;
format_support[format] = support;
}
diff --git a/src/gallium/state_trackers/d3d1x/progs/bin/d3d10tri.exe b/src/gallium/state_trackers/d3d1x/progs/bin/d3d10tri.exe
deleted file mode 100755
index 77ab03fcce..0000000000
--- a/src/gallium/state_trackers/d3d1x/progs/bin/d3d10tri.exe
+++ /dev/null
Binary files differ
diff --git a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11gears.exe b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11gears.exe
deleted file mode 100755
index c2cd296a40..0000000000
--- a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11gears.exe
+++ /dev/null
Binary files differ
diff --git a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11spikysphere.exe b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11spikysphere.exe
deleted file mode 100755
index c3bc667fb3..0000000000
--- a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11spikysphere.exe
+++ /dev/null
Binary files differ
diff --git a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tex.exe b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tex.exe
deleted file mode 100755
index 0be5cb8dd7..0000000000
--- a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tex.exe
+++ /dev/null
Binary files differ
diff --git a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tri.exe b/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tri.exe
deleted file mode 100755
index abada5484b..0000000000
--- a/src/gallium/state_trackers/d3d1x/progs/bin/d3d11tri.exe
+++ /dev/null
Binary files differ
diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c
index 3d5d24e692..e23c1bcafa 100644
--- a/src/gallium/state_trackers/dri/common/dri_context.c
+++ b/src/gallium/state_trackers/dri/common/dri_context.c
@@ -141,12 +141,18 @@ GLboolean
dri_unbind_context(__DRIcontext * cPriv)
{
/* dri_util.c ensures cPriv is not null */
+ struct dri_screen *screen = dri_screen(cPriv->driScreenPriv);
struct dri_context *ctx = dri_context(cPriv);
+ struct dri_drawable *draw = dri_drawable(ctx->dPriv);
+ struct dri_drawable *read = dri_drawable(ctx->rPriv);
+ struct st_api *stapi = screen->st_api;
if (--ctx->bind_count == 0) {
if (ctx->st == ctx->stapi->get_current(ctx->stapi)) {
- ctx->st->flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL);
- ctx->stapi->make_current(ctx->stapi, NULL, NULL, NULL);
+ ctx->st->flush(ctx->st, ST_FLUSH_FRONT, NULL);
+ stapi->make_current(stapi, NULL, NULL, NULL);
+ draw->context = NULL;
+ read->context = NULL;
}
}
@@ -165,21 +171,27 @@ dri_make_current(__DRIcontext * cPriv,
struct st_context_iface *old_st = ctx->stapi->get_current(ctx->stapi);
if (old_st && old_st != ctx->st)
- old_st->flush(old_st, PIPE_FLUSH_RENDER_CACHE, NULL);
+ old_st->flush(old_st, ST_FLUSH_FRONT, NULL);
++ctx->bind_count;
+ if (!driDrawPriv && !driReadPriv)
+ return ctx->stapi->make_current(ctx->stapi, ctx->st, NULL, NULL);
+ else if (!driDrawPriv || !driReadPriv)
+ return GL_FALSE;
+
+ draw->context = ctx;
if (ctx->dPriv != driDrawPriv) {
ctx->dPriv = driDrawPriv;
draw->texture_stamp = driDrawPriv->lastStamp - 1;
}
+ read->context = ctx;
if (ctx->rPriv != driReadPriv) {
ctx->rPriv = driReadPriv;
read->texture_stamp = driReadPriv->lastStamp - 1;
}
- ctx->stapi->make_current(ctx->stapi, ctx->st,
- (draw) ? &draw->base : NULL, (read) ? &read->base : NULL);
+ ctx->stapi->make_current(ctx->stapi, ctx->st, &draw->base, &read->base);
return GL_TRUE;
}
diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c
index 5fd6e7863c..28a33ac7d0 100644
--- a/src/gallium/state_trackers/dri/common/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/common/dri_drawable.c
@@ -132,6 +132,7 @@ dri_create_buffer(__DRIscreen * sPriv,
drawable->base.validate = dri_st_framebuffer_validate;
drawable->base.st_manager_private = (void *) drawable;
+ drawable->screen = screen;
drawable->sPriv = sPriv;
drawable->dPriv = dPriv;
dPriv->driverPrivate = (void *)drawable;
@@ -233,6 +234,7 @@ const __DRItexBufferExtension driTexBufferExtension = {
{ __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
dri_set_tex_buffer,
dri_set_tex_buffer2,
+ NULL,
};
/**
diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h
index 2ff6b71329..7f1aa512ca 100644
--- a/src/gallium/state_trackers/dri/common/dri_drawable.h
+++ b/src/gallium/state_trackers/dri/common/dri_drawable.h
@@ -41,6 +41,9 @@ struct dri_drawable
struct st_framebuffer_iface base;
struct st_visual stvis;
+ struct dri_screen *screen;
+ struct dri_context *context;
+
/* dri */
__DRIdrawable *dPriv;
__DRIscreen *sPriv;
diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c
index f6e22c74b4..5931df993b 100644
--- a/src/gallium/state_trackers/dri/common/dri_screen.c
+++ b/src/gallium/state_trackers/dri/common/dri_screen.c
@@ -81,34 +81,34 @@ dri_fill_in_modes(struct dri_screen *screen,
pf_x8z24 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_Z24X8_UNORM,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_DEPTH_STENCIL, 0);
+ PIPE_BIND_DEPTH_STENCIL);
pf_z24x8 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_X8Z24_UNORM,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_DEPTH_STENCIL, 0);
+ PIPE_BIND_DEPTH_STENCIL);
pf_s8z24 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_Z24_UNORM_S8_USCALED,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_DEPTH_STENCIL, 0);
+ PIPE_BIND_DEPTH_STENCIL);
pf_z24s8 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_S8_USCALED_Z24_UNORM,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_DEPTH_STENCIL, 0);
+ PIPE_BIND_DEPTH_STENCIL);
pf_a8r8g8b8 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_B8G8R8A8_UNORM,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_RENDER_TARGET, 0);
+ PIPE_BIND_RENDER_TARGET);
pf_x8r8g8b8 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_B8G8R8X8_UNORM,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_RENDER_TARGET, 0);
+ PIPE_BIND_RENDER_TARGET);
pf_r5g6b5 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_B5G6R5_UNORM,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_RENDER_TARGET, 0);
+ PIPE_BIND_RENDER_TARGET);
/* We can only get a 16 or 32 bit depth buffer with getBuffersWithFormat */
if (dri_with_format(screen->sPriv)) {
pf_z16 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_Z16_UNORM,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_DEPTH_STENCIL, 0);
+ PIPE_BIND_DEPTH_STENCIL);
pf_z32 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_Z32_UNORM,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_DEPTH_STENCIL, 0);
+ PIPE_BIND_DEPTH_STENCIL);
} else {
pf_z16 = FALSE;
pf_z32 = FALSE;
@@ -142,7 +142,7 @@ dri_fill_in_modes(struct dri_screen *screen,
for (i = 1; i < 5; i++) {
if (p_screen->is_format_supported(p_screen, PIPE_FORMAT_B5G6R5_UNORM,
PIPE_TEXTURE_2D, i*2,
- PIPE_BIND_RENDER_TARGET, 0)) {
+ PIPE_BIND_RENDER_TARGET)) {
msaa_samples_array[msaa_samples_factor] = i * 2;
msaa_samples_factor++;
}
@@ -161,7 +161,7 @@ dri_fill_in_modes(struct dri_screen *screen,
for (i = 1; i < 5; i++) {
if (p_screen->is_format_supported(p_screen, PIPE_FORMAT_B8G8R8A8_UNORM,
PIPE_TEXTURE_2D, i*2,
- PIPE_BIND_RENDER_TARGET, 0)) {
+ PIPE_BIND_RENDER_TARGET)) {
msaa_samples_array[msaa_samples_factor] = i * 2;
msaa_samples_factor++;
}
@@ -183,7 +183,7 @@ dri_fill_in_modes(struct dri_screen *screen,
for (i = 1; i < 5; i++) {
if (p_screen->is_format_supported(p_screen, PIPE_FORMAT_B8G8R8X8_UNORM,
PIPE_TEXTURE_2D, i*2,
- PIPE_BIND_RENDER_TARGET, 0)) {
+ PIPE_BIND_RENDER_TARGET)) {
msaa_samples_array[msaa_samples_factor] = i * 2;
msaa_samples_factor++;
}
@@ -235,7 +235,6 @@ dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen,
return;
stvis->samples = mode->samples;
- stvis->render_buffer = ST_ATTACHMENT_INVALID;
if (mode->redBits == 8) {
if (mode->alphaBits == 8)
@@ -274,8 +273,11 @@ dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen,
PIPE_FORMAT_R16G16B16A16_SNORM : PIPE_FORMAT_NONE;
stvis->buffer_mask |= ST_ATTACHMENT_FRONT_LEFT_MASK;
- if (mode->doubleBufferMode)
+ stvis->render_buffer = ST_ATTACHMENT_FRONT_LEFT;
+ if (mode->doubleBufferMode) {
stvis->buffer_mask |= ST_ATTACHMENT_BACK_LEFT_MASK;
+ stvis->render_buffer = ST_ATTACHMENT_BACK_LEFT;
+ }
if (mode->stereoMode) {
stvis->buffer_mask |= ST_ATTACHMENT_FRONT_RIGHT_MASK;
if (mode->doubleBufferMode)
diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index a9d05a80fb..30326a23d6 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -38,6 +38,7 @@
#include "dri_screen.h"
#include "dri_context.h"
#include "dri_drawable.h"
+#include "dri2_buffer.h"
/**
* DRI2 flush extension.
@@ -51,7 +52,7 @@ static void
dri2_invalidate_drawable(__DRIdrawable *dPriv)
{
struct dri_drawable *drawable = dri_drawable(dPriv);
- struct dri_context *ctx = dri_context(dPriv->driContextPriv);
+ struct dri_context *ctx = drawable->context;
dri2InvalidateDrawable(dPriv);
drawable->dPriv->lastStamp = *drawable->dPriv->pStamp;
@@ -259,6 +260,92 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable,
memcpy(drawable->old, buffers, sizeof(__DRIbuffer) * count);
}
+static __DRIbuffer *
+dri2_allocate_buffer(__DRIscreen *sPriv,
+ unsigned attachment, unsigned format,
+ int width, int height)
+{
+ struct dri_screen *screen = dri_screen(sPriv);
+ struct dri2_buffer *buffer;
+ struct pipe_resource templ;
+ enum st_attachment_type statt;
+ enum pipe_format pf;
+ unsigned bind = 0;
+ struct winsys_handle whandle;
+
+ switch (attachment) {
+ case __DRI_BUFFER_FRONT_LEFT:
+ case __DRI_BUFFER_FAKE_FRONT_LEFT:
+ statt = ST_ATTACHMENT_FRONT_LEFT;
+ bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+ break;
+ case __DRI_BUFFER_BACK_LEFT:
+ statt = ST_ATTACHMENT_BACK_LEFT;
+ bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+ break;
+ case __DRI_BUFFER_DEPTH:
+ case __DRI_BUFFER_DEPTH_STENCIL:
+ case __DRI_BUFFER_STENCIL:
+ statt = ST_ATTACHMENT_DEPTH_STENCIL;
+ bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */
+ break;
+ default:
+ statt = ST_ATTACHMENT_INVALID;
+ break;
+ }
+
+ switch (format) {
+ case 32:
+ pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+ break;
+ case 16:
+ pf = PIPE_FORMAT_Z16_UNORM;
+ break;
+ default:
+ return NULL;
+ }
+
+ buffer = CALLOC_STRUCT(dri2_buffer);
+ if (!buffer)
+ return NULL;
+
+ memset(&templ, 0, sizeof(templ));
+ templ.bind = bind;
+ templ.format = pf;
+ templ.target = PIPE_TEXTURE_2D;
+ templ.last_level = 0;
+ templ.width0 = width;
+ templ.height0 = height;
+ templ.depth0 = 1;
+ templ.array_size = 1;
+
+ buffer->resource =
+ screen->base.screen->resource_create(screen->base.screen, &templ);
+ if (!buffer->resource)
+ return NULL;
+
+ memset(&whandle, 0, sizeof(whandle));
+ whandle.type = DRM_API_HANDLE_TYPE_SHARED;
+ screen->base.screen->resource_get_handle(screen->base.screen,
+ buffer->resource, &whandle);
+
+ buffer->base.attachment = attachment;
+ buffer->base.name = whandle.handle;
+ buffer->base.cpp = util_format_get_blocksize(pf);
+ buffer->base.pitch = whandle.stride;
+
+ return &buffer->base;
+}
+
+static void
+dri2_release_buffer(__DRIscreen *sPriv, __DRIbuffer *bPriv)
+{
+ struct dri2_buffer *buffer = dri2_buffer(bPriv);
+
+ pipe_resource_reference(&buffer->resource, NULL);
+ FREE(buffer);
+}
+
/*
* Backend functions for st_framebuffer interface.
*/
@@ -373,7 +460,7 @@ static __DRIimage *
dri2_create_image_from_renderbuffer(__DRIcontext *context,
int renderbuffer, void *loaderPrivate)
{
- struct dri_context *ctx = dri_context(context->driverPrivate);
+ struct dri_context *ctx = dri_context(context);
if (!ctx->st->get_resource_for_egl_image)
return NULL;
@@ -424,6 +511,7 @@ dri2_create_image(__DRIscreen *_screen,
templ.width0 = width;
templ.height0 = height;
templ.depth0 = 1;
+ templ.array_size = 1;
img->texture = screen->base.screen->resource_create(screen->base.screen, &templ);
if (!img->texture) {
@@ -601,6 +689,9 @@ const struct __DriverAPIRec driDriverAPI = {
.SwapBuffers = NULL,
.CopySubBuffer = NULL,
+
+ .AllocateBuffer = dri2_allocate_buffer,
+ .ReleaseBuffer = dri2_release_buffer,
};
/* This is the table of extensions that the loader will dlsym() for. */
diff --git a/src/gallium/state_trackers/dri/drm/dri2_buffer.h b/src/gallium/state_trackers/dri/drm/dri2_buffer.h
new file mode 100644
index 0000000000..1cd8dbbda2
--- /dev/null
+++ b/src/gallium/state_trackers/dri/drm/dri2_buffer.h
@@ -0,0 +1,22 @@
+#ifndef DRI2_BUFFER_H
+#define DRI2_BUFFER_H
+
+#include "dri_wrapper.h"
+
+struct pipe_surface;
+
+struct dri2_buffer
+{
+ __DRIbuffer base;
+ struct pipe_resource *resource;
+};
+
+static INLINE struct dri2_buffer *
+dri2_buffer(__DRIbuffer * driBufferPriv)
+{
+ return (struct dri2_buffer *) driBufferPriv;
+}
+
+#endif
+
+/* vim: set sw=3 ts=8 sts=3 expandtab: */
diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c
index 30088b0968..ac11f7c47f 100644
--- a/src/gallium/state_trackers/dri/sw/drisw.c
+++ b/src/gallium/state_trackers/dri/sw/drisw.c
@@ -139,7 +139,7 @@ drisw_swap_buffers(__DRIdrawable *dPriv)
ptex = drawable->textures[ST_ATTACHMENT_BACK_LEFT];
if (ptex) {
- ctx->st->flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL);
+ ctx->st->flush(ctx->st, ST_FLUSH_FRONT, NULL);
drisw_copy_to_front(dPriv, ptex);
}
@@ -233,6 +233,7 @@ drisw_allocate_textures(struct dri_drawable *drawable,
*/
static const __DRIextension *drisw_screen_extensions[] = {
+ &driTexBufferExtension.base,
NULL
};
diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile
index 8cfcef968e..53673a78a9 100644
--- a/src/gallium/state_trackers/egl/Makefile
+++ b/src/gallium/state_trackers/egl/Makefile
@@ -23,6 +23,14 @@ x11_SOURCES = $(wildcard x11/*.c) \
$(TOP)/src/glx/dri2.c
x11_OBJECTS = $(x11_SOURCES:.c=.o)
+wayland_INCLUDES = \
+ -I$(TOP)/src/gallium/winsys \
+ -I$(TOP)/src/egl/wayland/wayland-egl \
+ -I$(TOP)/src/egl/wayland/wayland-drm \
+ $(shell pkg-config --cflags-only-I libdrm wayland-client)
+
+wayland_SOURCES = $(wildcard wayland/*.c)
+wayland_OBJECTS = $(wayland_SOURCES:.c=.o)
drm_INCLUDES = -I$(TOP)/src/gallium/winsys $(shell pkg-config --cflags-only-I libdrm)
drm_SOURCES = $(wildcard drm/*.c)
@@ -45,6 +53,10 @@ ifneq ($(findstring x11, $(EGL_PLATFORMS)),)
EGL_OBJECTS += $(x11_OBJECTS)
EGL_CPPFLAGS += -DHAVE_X11_BACKEND
endif
+ifneq ($(findstring wayland, $(EGL_PLATFORMS)),)
+EGL_OBJECTS += $(wayland_OBJECTS)
+EGL_CPPFLAGS += -DHAVE_WAYLAND_BACKEND
+endif
ifneq ($(findstring drm, $(EGL_PLATFORMS)),)
EGL_OBJECTS += $(drm_OBJECTS)
EGL_CPPFLAGS += -DHAVE_DRM_BACKEND
@@ -87,6 +99,9 @@ $(common_OBJECTS): %.o: %.c
$(x11_OBJECTS): %.o: %.c
$(call egl-cc,x11)
+$(wayland_OBJECTS): %.o: %.c
+ $(call egl-cc,wayland)
+
$(drm_OBJECTS): %.o: %.c
$(call egl-cc,drm)
diff --git a/src/gallium/state_trackers/egl/SConscript b/src/gallium/state_trackers/egl/SConscript
index 50c7681995..9ade76ecbb 100644
--- a/src/gallium/state_trackers/egl/SConscript
+++ b/src/gallium/state_trackers/egl/SConscript
@@ -10,11 +10,8 @@ env.Append(CPPPATH = [
'#/src/gallium/winsys/sw',
'.',
])
-env.Append(CPPDEFINES = [
- 'HAVE_GDI_BACKEND',
-])
-common_sources = [
+sources = [
'common/egl_g3d.c',
'common/egl_g3d_api.c',
'common/egl_g3d_image.c',
@@ -23,12 +20,31 @@ common_sources = [
'common/native_helper.c',
]
-gdi_sources = common_sources + [
- 'gdi/native_gdi.c',
-]
+if env['platform'] == 'windows':
+ env.Append(CPPDEFINES = ['HAVE_GDI_BACKEND'])
+ sources.append('gdi/native_gdi.c')
+else:
+ if env['x11']:
+ env.Append(CPPDEFINES = ['HAVE_X11_BACKEND'])
+ env.Prepend(CPPPATH = [
+ '#/src/glx',
+ '#/src/mapi',
+ ])
+ sources.append([
+ 'x11/native_x11.c',
+ 'x11/native_dri2.c',
+ 'x11/native_ximage.c',
+ 'x11/x11_screen.c',
+ 'x11/glxinit.c'])
+ if env['dri']:
+ env.Append(CPPDEFINES = ['GLX_DIRECT_RENDERING'])
+ sources.append(['#/src/glx/dri2.c'])
+ if env['drm']:
+ env.Append(CPPDEFINES = ['HAVE_DRM_BACKEND'])
+ sources.append(['drm/native_drm.c', 'drm/modeset.c'])
-st_egl_gdi = env.ConvenienceLibrary(
- target = 'st_egl_gdi',
- source = gdi_sources,
+st_egl = env.ConvenienceLibrary(
+ target = 'st_egl',
+ source = sources,
)
-Export('st_egl_gdi')
+Export('st_egl')
diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp
index 450eae3868..a584d54db4 100644
--- a/src/gallium/state_trackers/egl/android/native_android.cpp
+++ b/src/gallium/state_trackers/egl/android/native_android.cpp
@@ -34,7 +34,13 @@
extern "C" {
#include "egllog.h"
+
+/* see get_drm_screen_name */
+#include <xf86drm.h>
+#include <radeon_drm.h>
+#include "radeon/drm/radeon_drm_public.h"
}
+
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
@@ -157,12 +163,12 @@ import_buffer(struct android_display *adpy, const struct pipe_resource *templ,
if (templ->bind & PIPE_BIND_RENDER_TARGET) {
if (!screen->is_format_supported(screen, templ->format,
- templ->target, 0, PIPE_BIND_RENDER_TARGET, 0))
+ templ->target, 0, PIPE_BIND_RENDER_TARGET))
LOGW("importing unsupported buffer as render target");
}
if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
if (!screen->is_format_supported(screen, templ->format,
- templ->target, 0, PIPE_BIND_SAMPLER_VIEW, 0))
+ templ->target, 0, PIPE_BIND_SAMPLER_VIEW))
LOGW("importing unsupported buffer as sampler view");
}
@@ -399,7 +405,7 @@ android_display_create_window_surface(struct native_display *ndpy,
LOGW("native window format 0x%x != config format 0x%x",
format, nconf->color_format);
if (!adpy->base.screen->is_format_supported(adpy->base.screen,
- format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET, 0)) {
+ format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) {
LOGE("and the native window cannot be used as a render target");
return NULL;
}
@@ -452,7 +458,7 @@ android_display_init_configs(struct native_display *ndpy)
color_format = get_pipe_format(native_formats[i]);
if (color_format == PIPE_FORMAT_NONE ||
!adpy->base.screen->is_format_supported(adpy->base.screen,
- color_format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET, 0)) {
+ color_format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) {
LOGI("skip unsupported native format 0x%x", native_formats[i]);
continue;
}
@@ -461,8 +467,6 @@ android_display_init_configs(struct native_display *ndpy)
aconf->base.buffer_mask = 1 << NATIVE_ATTACHMENT_BACK_LEFT;
aconf->base.color_format = color_format;
aconf->base.window_bit = TRUE;
- if (!adpy->use_drm)
- aconf->base.slow_config = TRUE;
aconf->base.native_visual_type = native_formats[i];
}
@@ -470,66 +474,94 @@ android_display_init_configs(struct native_display *ndpy)
return TRUE;
}
+static const char *
+get_drm_screen_name(int fd, drmVersionPtr version)
+{
+ const char *name = version->name;
+
+ if (name && !strcmp(name, "radeon")) {
+ int chip_id;
+ struct drm_radeon_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.request = RADEON_INFO_DEVICE_ID;
+ info.value = pointer_to_intptr(&chip_id);
+ if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0)
+ return NULL;
+
+ name = is_r3xx(chip_id) ? "r300" : "r600";
+ }
+
+ return name;
+}
+
static boolean
-android_display_init(struct native_display *ndpy)
+android_display_init_drm(struct native_display *ndpy)
{
struct android_display *adpy = android_display(ndpy);
const hw_module_t *mod;
int fd, err;
- boolean force_sw;
- char value[PROPERTY_VALUE_MAX];
-
- if (property_get("debug.mesa.software", value, NULL))
- force_sw = (atoi(value) != 0);
- else
- force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE);
- /* try DRM first */
- if (!force_sw) {
- err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod);
- if (!err) {
- const gralloc_module_t *gr = (gralloc_module_t *) mod;
+ err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod);
+ if (!err) {
+ const gralloc_module_t *gr = (gralloc_module_t *) mod;
- err = -EINVAL;
- if (gr->perform)
- err = gr->perform(gr, GRALLOC_MODULE_PERFORM_GET_DRM_FD, &fd);
+ err = -EINVAL;
+ if (gr->perform)
+ err = gr->perform(gr, GRALLOC_MODULE_PERFORM_GET_DRM_FD, &fd);
+ }
+ if (!err && fd >= 0) {
+ drmVersionPtr version;
+ const char *name;
+
+ version = drmGetVersion(fd);
+ if (version) {
+ name = get_drm_screen_name(fd, version);
+ if (name) {
+ adpy->base.screen =
+ adpy->event_handler->new_drm_screen(&adpy->base, name, fd);
+ }
+ drmFreeVersion(version);
}
- if (!err && fd >= 0) {
- adpy->base.screen =
- adpy->event_handler->new_drm_screen(&adpy->base, "i915", fd);
- if (adpy->base.screen)
- adpy->use_drm = TRUE;
+ else {
+ _eglLog(_EGL_WARNING, "invalid fd %d", fd);
+ err = -EINVAL;
}
if (adpy->base.screen)
- LOGI("using DRM screen");
- else
- LOGE("failed to create DRM screen");
+ adpy->use_drm = TRUE;
}
- /* try SW screen */
- if (!adpy->base.screen) {
- struct sw_winsys *ws = android_create_sw_winsys();
+ if (adpy->base.screen) {
+ LOGI("using DRM screen");
+ return TRUE;
+ }
+ else {
+ LOGE("failed to create DRM screen");
+ return FALSE;
+ }
+}
- if (ws) {
- adpy->base.screen =
- adpy->event_handler->new_sw_screen(&adpy->base, ws);
- }
+static boolean
+android_display_init_sw(struct native_display *ndpy)
+{
+ struct android_display *adpy = android_display(ndpy);
+ struct sw_winsys *ws;
- if (adpy->base.screen)
- LOGI("using SW screen");
- else
- LOGE("failed to create SW screen");
+ ws = android_create_sw_winsys();
+ if (ws) {
+ adpy->base.screen =
+ adpy->event_handler->new_sw_screen(&adpy->base, ws);
}
if (adpy->base.screen) {
- if (!android_display_init_configs(&adpy->base)) {
- adpy->base.screen->destroy(adpy->base.screen);
- adpy->base.screen = NULL;
- }
+ LOGI("using SW screen");
+ return TRUE;
+ }
+ else {
+ LOGE("failed to create SW screen");
+ return FALSE;
}
-
- return (adpy->base.screen != NULL);
}
static void
@@ -603,9 +635,11 @@ static struct native_display_buffer android_display_buffer = {
static struct android_display *
android_display_create(struct native_event_handler *event_handler,
- void *user_data)
+ boolean use_sw, void *user_data)
{
struct android_display *adpy;
+ char value[PROPERTY_VALUE_MAX];
+ boolean force_sw;
adpy = CALLOC_STRUCT(android_display);
if (!adpy)
@@ -614,7 +648,23 @@ android_display_create(struct native_event_handler *event_handler,
adpy->event_handler = event_handler;
adpy->base.user_data = user_data;
- if (!android_display_init(&adpy->base)) {
+ if (property_get("debug.mesa.software", value, NULL))
+ force_sw = (atoi(value) != 0);
+ else
+ force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE);
+
+ if (force_sw || use_sw)
+ android_display_init_sw(&adpy->base);
+ else
+ android_display_init_drm(&adpy->base);
+
+ if (!adpy->base.screen) {
+ FREE(adpy);
+ return NULL;
+ }
+
+ if (!android_display_init_configs(&adpy->base)) {
+ adpy->base.screen->destroy(adpy->base.screen);
FREE(adpy);
return NULL;
}
@@ -629,19 +679,27 @@ android_display_create(struct native_event_handler *event_handler,
return adpy;
}
+static struct native_event_handler *android_event_handler;
+
+static void
+native_set_event_handler(struct native_event_handler *event_handler)
+{
+ android_event_handler = event_handler;
+}
+
static struct native_display *
-native_create_display(void *dpy, struct native_event_handler *event_handler,
- void *user_data)
+native_create_display(void *dpy, boolean use_sw, void *user_data)
{
struct android_display *adpy;
- adpy = android_display_create(event_handler, user_data);
+ adpy = android_display_create(android_event_handler, use_sw, user_data);
return (adpy) ? &adpy->base : NULL;
}
static const struct native_platform android_platform = {
"Android", /* name */
+ native_set_event_handler,
native_create_display
};
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c
index 604091aac0..e455167473 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.c
@@ -38,6 +38,46 @@
#include "egl_g3d_loader.h"
#include "native.h"
+static void
+egl_g3d_invalid_surface(struct native_display *ndpy,
+ struct native_surface *nsurf,
+ unsigned int seq_num)
+{
+ /* XXX not thread safe? */
+ struct egl_g3d_surface *gsurf = egl_g3d_surface(nsurf->user_data);
+ struct egl_g3d_context *gctx;
+
+ /*
+ * Some functions such as egl_g3d_copy_buffers create a temporary native
+ * surface. There is no gsurf associated with it.
+ */
+ gctx = (gsurf) ? egl_g3d_context(gsurf->base.CurrentContext) : NULL;
+ if (gctx)
+ gctx->stctxi->notify_invalid_framebuffer(gctx->stctxi, gsurf->stfbi);
+}
+
+static struct pipe_screen *
+egl_g3d_new_drm_screen(struct native_display *ndpy, const char *name, int fd)
+{
+ _EGLDisplay *dpy = (_EGLDisplay *) ndpy->user_data;
+ struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+ return gdpy->loader->create_drm_screen(name, fd);
+}
+
+static struct pipe_screen *
+egl_g3d_new_sw_screen(struct native_display *ndpy, struct sw_winsys *ws)
+{
+ _EGLDisplay *dpy = (_EGLDisplay *) ndpy->user_data;
+ struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+ return gdpy->loader->create_sw_screen(ws);
+}
+
+static struct native_event_handler egl_g3d_native_event_handler = {
+ egl_g3d_invalid_surface,
+ egl_g3d_new_drm_screen,
+ egl_g3d_new_sw_screen
+};
+
/**
* Get the native platform.
*/
@@ -62,6 +102,12 @@ egl_g3d_get_platform(_EGLDriver *drv, _EGLPlatformType plat)
#ifdef HAVE_X11_BACKEND
nplat = native_get_x11_platform();
#endif
+ break;
+ case _EGL_PLATFORM_WAYLAND:
+ plat_name = "wayland";
+#ifdef HAVE_WAYLAND_BACKEND
+ nplat = native_get_wayland_platform();
+#endif
break;
case _EGL_PLATFORM_DRM:
plat_name = "DRM";
@@ -85,7 +131,9 @@ egl_g3d_get_platform(_EGLDriver *drv, _EGLPlatformType plat)
break;
}
- if (!nplat)
+ if (nplat)
+ nplat->set_event_handler(&egl_g3d_native_event_handler);
+ else
_eglLog(_EGL_WARNING, "unsupported platform %s", plat_name);
gdrv->platforms[plat] = nplat;
@@ -189,17 +237,21 @@ init_config_attributes(_EGLConfig *conf, const struct native_config *nconf,
}
surface_type = 0x0;
- if (nconf->window_bit)
- surface_type |= EGL_WINDOW_BIT;
- if (nconf->pixmap_bit)
- surface_type |= EGL_PIXMAP_BIT;
+ /* pixmap surfaces should be EGL_SINGLE_BUFFER */
+ if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_FRONT_LEFT)) {
+ if (nconf->pixmap_bit)
+ surface_type |= EGL_PIXMAP_BIT;
+ }
+ /* the others surfaces should be EGL_BACK_BUFFER (or settable) */
+ if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT)) {
+ if (nconf->window_bit)
+ surface_type |= EGL_WINDOW_BIT;
#ifdef EGL_MESA_screen_surface
- if (nconf->scanout_bit)
- surface_type |= EGL_SCREEN_BIT_MESA;
+ if (nconf->scanout_bit)
+ surface_type |= EGL_SCREEN_BIT_MESA;
#endif
-
- if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT))
surface_type |= EGL_PBUFFER_BIT;
+ }
conf->Conformant = api_mask;
conf->RenderableType = api_mask;
@@ -232,11 +284,6 @@ init_config_attributes(_EGLConfig *conf, const struct native_config *nconf,
}
conf->Level = nconf->level;
- conf->Samples = nconf->samples;
- conf->SampleBuffers = 0;
-
- if (nconf->slow_config)
- conf->ConfigCaveat = EGL_SLOW_CONFIG;
if (nconf->transparent_rgb) {
conf->TransparentType = EGL_TRANSPARENT_RGB;
@@ -263,13 +310,9 @@ egl_g3d_init_config(_EGLDriver *drv, _EGLDisplay *dpy,
int preserve_buffer, int max_swap_interval)
{
struct egl_g3d_config *gconf = egl_g3d_config(conf);
- EGLint buffer_mask, api_mask;
+ EGLint buffer_mask;
EGLBoolean valid;
- /* skip single-buffered configs */
- if (!(nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT)))
- return EGL_FALSE;
-
buffer_mask = 0x0;
if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_FRONT_LEFT))
buffer_mask |= ST_ATTACHMENT_FRONT_LEFT_MASK;
@@ -284,24 +327,14 @@ egl_g3d_init_config(_EGLDriver *drv, _EGLDisplay *dpy,
gconf->stvis.color_format = nconf->color_format;
gconf->stvis.depth_stencil_format = depth_stencil_format;
gconf->stvis.accum_format = PIPE_FORMAT_NONE;
- gconf->stvis.samples = nconf->samples;
+ gconf->stvis.samples = 0;
+ /* will be overridden per surface */
gconf->stvis.render_buffer = (buffer_mask & ST_ATTACHMENT_BACK_LEFT_MASK) ?
ST_ATTACHMENT_BACK_LEFT : ST_ATTACHMENT_FRONT_LEFT;
- api_mask = dpy->ClientAPIsMask;
- /* this is required by EGL, not by OpenGL ES */
- if (nconf->window_bit &&
- gconf->stvis.render_buffer != ST_ATTACHMENT_BACK_LEFT)
- api_mask &= ~(EGL_OPENGL_ES_BIT | EGL_OPENGL_ES2_BIT);
-
- if (!api_mask) {
- _eglLog(_EGL_DEBUG, "no state tracker supports config 0x%x",
- nconf->native_visual_id);
- }
-
valid = init_config_attributes(&gconf->base,
- nconf, api_mask, depth_stencil_format,
+ nconf, dpy->ClientAPIs, depth_stencil_format,
preserve_buffer, max_swap_interval);
if (!valid) {
_eglLog(_EGL_DEBUG, "skip invalid config 0x%x", nconf->native_visual_id);
@@ -341,7 +374,7 @@ egl_g3d_fill_depth_stencil_formats(_EGLDisplay *dpy,
/* pick the first supported format */
for (i = 0; i < n; i++) {
if (screen->is_format_supported(screen, fmt[i],
- PIPE_TEXTURE_2D, 0, PIPE_BIND_DEPTH_STENCIL, 0)) {
+ PIPE_TEXTURE_2D, 0, PIPE_BIND_DEPTH_STENCIL)) {
formats[count++] = fmt[i];
break;
}
@@ -405,46 +438,6 @@ egl_g3d_add_configs(_EGLDriver *drv, _EGLDisplay *dpy, EGLint id)
}
static void
-egl_g3d_invalid_surface(struct native_display *ndpy,
- struct native_surface *nsurf,
- unsigned int seq_num)
-{
- /* XXX not thread safe? */
- struct egl_g3d_surface *gsurf = egl_g3d_surface(nsurf->user_data);
- struct egl_g3d_context *gctx;
-
- /*
- * Some functions such as egl_g3d_copy_buffers create a temporary native
- * surface. There is no gsurf associated with it.
- */
- gctx = (gsurf) ? egl_g3d_context(gsurf->base.CurrentContext) : NULL;
- if (gctx)
- gctx->stctxi->notify_invalid_framebuffer(gctx->stctxi, gsurf->stfbi);
-}
-
-static struct pipe_screen *
-egl_g3d_new_drm_screen(struct native_display *ndpy, const char *name, int fd)
-{
- _EGLDisplay *dpy = (_EGLDisplay *) ndpy->user_data;
- struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
- return gdpy->loader->create_drm_screen(name, fd);
-}
-
-static struct pipe_screen *
-egl_g3d_new_sw_screen(struct native_display *ndpy, struct sw_winsys *ws)
-{
- _EGLDisplay *dpy = (_EGLDisplay *) ndpy->user_data;
- struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
- return gdpy->loader->create_sw_screen(ws);
-}
-
-static struct native_event_handler egl_g3d_native_event_handler = {
- egl_g3d_invalid_surface,
- egl_g3d_new_drm_screen,
- egl_g3d_new_sw_screen
-};
-
-static void
egl_g3d_free_config(void *conf)
{
struct egl_g3d_config *gconf = egl_g3d_config((_EGLConfig *) conf);
@@ -468,9 +461,6 @@ egl_g3d_terminate(_EGLDriver *drv, _EGLDisplay *dpy)
_eglReleaseDisplayResources(drv, dpy);
- if (gdpy->pipe)
- gdpy->pipe->destroy(gdpy->pipe);
-
if (dpy->Configs) {
_eglDestroyArray(dpy->Configs, egl_g3d_free_config);
dpy->Configs = NULL;
@@ -495,8 +485,7 @@ egl_g3d_terminate(_EGLDriver *drv, _EGLDisplay *dpy)
}
static EGLBoolean
-egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy,
- EGLint *major, EGLint *minor)
+egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy)
{
struct egl_g3d_driver *gdrv = egl_g3d_driver(drv);
struct egl_g3d_display *gdpy;
@@ -506,6 +495,9 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy,
if (!nplat)
return EGL_FALSE;
+ if (dpy->Options.TestOnly)
+ return EGL_TRUE;
+
gdpy = CALLOC_STRUCT(egl_g3d_display);
if (!gdpy) {
_eglError(EGL_BAD_ALLOC, "eglInitialize");
@@ -516,20 +508,20 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy,
_eglLog(_EGL_INFO, "use %s for display %p", nplat->name, dpy->PlatformDisplay);
gdpy->native = nplat->create_display(dpy->PlatformDisplay,
- &egl_g3d_native_event_handler, (void *) dpy);
+ dpy->Options.UseFallback, (void *) dpy);
if (!gdpy->native) {
_eglError(EGL_NOT_INITIALIZED, "eglInitialize(no usable display)");
goto fail;
}
if (gdpy->loader->profile_masks[ST_API_OPENGL] & ST_PROFILE_DEFAULT_MASK)
- dpy->ClientAPIsMask |= EGL_OPENGL_BIT;
+ dpy->ClientAPIs |= EGL_OPENGL_BIT;
if (gdpy->loader->profile_masks[ST_API_OPENGL] & ST_PROFILE_OPENGL_ES1_MASK)
- dpy->ClientAPIsMask |= EGL_OPENGL_ES_BIT;
+ dpy->ClientAPIs |= EGL_OPENGL_ES_BIT;
if (gdpy->loader->profile_masks[ST_API_OPENGL] & ST_PROFILE_OPENGL_ES2_MASK)
- dpy->ClientAPIsMask |= EGL_OPENGL_ES2_BIT;
+ dpy->ClientAPIs |= EGL_OPENGL_ES2_BIT;
if (gdpy->loader->profile_masks[ST_API_OPENVG] & ST_PROFILE_DEFAULT_MASK)
- dpy->ClientAPIsMask |= EGL_OPENVG_BIT;
+ dpy->ClientAPIs |= EGL_OPENVG_BIT;
gdpy->smapi = egl_g3d_create_st_manager(dpy);
if (!gdpy->smapi) {
@@ -563,6 +555,9 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy,
dpy->Extensions.MESA_drm_image = EGL_TRUE;
}
+ if (dpy->Platform == _EGL_PLATFORM_WAYLAND && gdpy->native->buffer)
+ dpy->Extensions.MESA_drm_image = EGL_TRUE;
+
#ifdef EGL_ANDROID_image_native_buffer
if (dpy->Platform == _EGL_PLATFORM_ANDROID && gdpy->native->buffer)
dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
@@ -573,8 +568,8 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy,
goto fail;
}
- *major = 1;
- *minor = 4;
+ dpy->VersionMajor = 1;
+ dpy->VersionMinor = 4;
return EGL_TRUE;
@@ -599,12 +594,6 @@ egl_g3d_get_proc_address(_EGLDriver *drv, const char *procname)
stapi->get_proc_address(stapi, procname) : NULL);
}
-static EGLint
-egl_g3d_probe(_EGLDriver *drv, _EGLDisplay *dpy)
-{
- return (egl_g3d_get_platform(drv, dpy->Platform)) ? 90 : 0;
-}
-
_EGLDriver *
egl_g3d_create_driver(const struct egl_g3d_loader *loader)
{
@@ -621,8 +610,6 @@ egl_g3d_create_driver(const struct egl_g3d_loader *loader)
gdrv->base.API.Terminate = egl_g3d_terminate;
gdrv->base.API.GetProcAddress = egl_g3d_get_proc_address;
- gdrv->base.Probe = egl_g3d_probe;
-
/* to be filled by the caller */
gdrv->base.Name = NULL;
gdrv->base.Unload = NULL;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.h b/src/gallium/state_trackers/egl/common/egl_g3d.h
index a5850635b2..2292d611fc 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.h
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.h
@@ -56,7 +56,6 @@ struct egl_g3d_display {
const struct egl_g3d_loader *loader;
struct st_manager *smapi;
- struct pipe_context *pipe;
};
struct egl_g3d_context {
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c
index 8e53e1dccb..f1568329ec 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c
@@ -158,17 +158,17 @@ egl_g3d_choose_config(_EGLDriver *drv, _EGLDisplay *dpy, const EGLint *attribs,
(_EGLArrayForEach) egl_g3d_match_config, (void *) &criteria);
/* perform sorting of configs */
- if (tmp_configs && tmp_size) {
+ if (configs && tmp_size) {
_eglSortConfigs((const _EGLConfig **) tmp_configs, tmp_size,
egl_g3d_compare_config, (void *) &criteria);
- size = MIN2(tmp_size, size);
- for (i = 0; i < size; i++)
+ tmp_size = MIN2(tmp_size, size);
+ for (i = 0; i < tmp_size; i++)
configs[i] = _eglGetConfigHandle(tmp_configs[i]);
}
FREE(tmp_configs);
- *num_configs = size;
+ *num_configs = tmp_size;
return EGL_TRUE;
}
@@ -324,7 +324,8 @@ egl_g3d_create_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf,
}
gsurf->stvis = gconf->stvis;
- if (gsurf->base.RenderBuffer == EGL_SINGLE_BUFFER)
+ if (gsurf->base.RenderBuffer == EGL_SINGLE_BUFFER &&
+ gconf->stvis.buffer_mask & ST_ATTACHMENT_FRONT_LEFT_MASK)
gsurf->stvis.render_buffer = ST_ATTACHMENT_FRONT_LEFT;
gsurf->stfbi = egl_g3d_create_st_framebuffer(&gsurf->base);
@@ -402,7 +403,6 @@ egl_g3d_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy,
_EGLConfig *conf, const EGLint *attribs)
{
struct egl_g3d_surface *gsurf;
- struct pipe_resource *ptex = NULL;
gsurf = create_pbuffer_surface(dpy, conf, attribs,
"eglCreatePbufferSurface");
@@ -411,13 +411,6 @@ egl_g3d_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy,
gsurf->client_buffer_type = EGL_NONE;
- if (!gsurf->stfbi->validate(gsurf->stfbi,
- &gsurf->stvis.render_buffer, 1, &ptex)) {
- egl_g3d_destroy_st_framebuffer(gsurf->stfbi);
- FREE(gsurf);
- return NULL;
- }
-
return &gsurf->base;
}
@@ -477,12 +470,14 @@ egl_g3d_create_pbuffer_from_client_buffer(_EGLDriver *drv, _EGLDisplay *dpy,
gsurf->client_buffer_type = buftype;
gsurf->client_buffer = buffer;
+ /* validate now so that it fails if the client buffer is invalid */
if (!gsurf->stfbi->validate(gsurf->stfbi,
&gsurf->stvis.render_buffer, 1, &ptex)) {
egl_g3d_destroy_st_framebuffer(gsurf->stfbi);
FREE(gsurf);
return NULL;
}
+ pipe_resource_reference(&ptex, NULL);
return &gsurf->base;
}
@@ -533,8 +528,7 @@ egl_g3d_make_current(_EGLDriver *drv, _EGLDisplay *dpy,
old_gctx = egl_g3d_context(old_ctx);
if (old_gctx) {
/* flush old context */
- old_gctx->stctxi->flush(old_gctx->stctxi,
- PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL);
+ old_gctx->stctxi->flush(old_gctx->stctxi, ST_FLUSH_FRONT, NULL);
}
if (gctx) {
@@ -611,8 +605,7 @@ egl_g3d_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf)
/* flush if the surface is current */
if (gctx) {
- gctx->stctxi->flush(gctx->stctxi,
- PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL);
+ gctx->stctxi->flush(gctx->stctxi, ST_FLUSH_FRONT, NULL);
}
return gsurf->native->present(gsurf->native,
@@ -643,47 +636,36 @@ egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf,
struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
struct egl_g3d_surface *gsurf = egl_g3d_surface(surf);
_EGLContext *ctx = _eglGetCurrentContext();
- struct egl_g3d_config *gconf;
struct native_surface *nsurf;
struct pipe_resource *ptex;
+ struct pipe_context *pipe;
if (!gsurf->render_texture)
return EGL_TRUE;
- gconf = egl_g3d_config(egl_g3d_find_pixmap_config(dpy, target));
- if (!gconf)
- return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers");
-
- nsurf = gdpy->native->create_pixmap_surface(gdpy->native,
- target, gconf->native);
+ nsurf = gdpy->native->create_pixmap_surface(gdpy->native, target, NULL);
if (!nsurf)
return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers");
/* flush if the surface is current */
if (ctx && ctx->DrawSurface == &gsurf->base) {
struct egl_g3d_context *gctx = egl_g3d_context(ctx);
- gctx->stctxi->flush(gctx->stctxi,
- PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL);
+ gctx->stctxi->flush(gctx->stctxi, ST_FLUSH_FRONT, NULL);
}
- /* create a pipe context to copy surfaces */
- if (!gdpy->pipe) {
- gdpy->pipe =
- gdpy->native->screen->context_create(gdpy->native->screen, NULL);
- if (!gdpy->pipe)
- return EGL_FALSE;
- }
+ pipe = ndpy_get_copy_context(gdpy->native);
+ if (!pipe)
+ return EGL_FALSE;
ptex = get_pipe_resource(gdpy->native, nsurf, NATIVE_ATTACHMENT_FRONT_LEFT);
if (ptex) {
- struct pipe_resource *psrc = gsurf->render_texture;
struct pipe_box src_box;
+
u_box_origin_2d(ptex->width0, ptex->height0, &src_box);
- if (psrc) {
- gdpy->pipe->resource_copy_region(gdpy->pipe, ptex, 0, 0, 0, 0,
- gsurf->render_texture, 0, &src_box);
- nsurf->present(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT, FALSE, 0);
- }
+ pipe->resource_copy_region(pipe, ptex, 0, 0, 0, 0,
+ gsurf->render_texture, 0, &src_box);
+ pipe->flush(pipe, NULL);
+ nsurf->present(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT, FALSE, 0);
pipe_resource_reference(&ptex, NULL);
}
@@ -701,10 +683,9 @@ egl_g3d_wait_client(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx)
struct pipe_screen *screen = gdpy->native->screen;
struct pipe_fence_handle *fence = NULL;
- gctx->stctxi->flush(gctx->stctxi,
- PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, &fence);
+ gctx->stctxi->flush(gctx->stctxi, ST_FLUSH_FRONT, &fence);
if (fence) {
- screen->fence_finish(screen, fence, 0);
+ screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE);
screen->fence_reference(screen, &fence, NULL);
}
@@ -773,8 +754,7 @@ egl_g3d_bind_tex_image(_EGLDriver *drv, _EGLDisplay *dpy,
/* flush properly if the surface is bound */
if (gsurf->base.CurrentContext) {
gctx = egl_g3d_context(gsurf->base.CurrentContext);
- gctx->stctxi->flush(gctx->stctxi,
- PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL);
+ gctx->stctxi->flush(gctx->stctxi, ST_FLUSH_FRONT, NULL);
}
gctx = egl_g3d_context(es1);
@@ -888,25 +868,6 @@ egl_g3d_show_screen_surface(_EGLDriver *drv, _EGLDisplay *dpy,
#endif /* EGL_MESA_screen_surface */
-/**
- * Find a config that supports the pixmap.
- */
-_EGLConfig *
-egl_g3d_find_pixmap_config(_EGLDisplay *dpy, EGLNativePixmapType pix)
-{
- struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
- struct egl_g3d_config *gconf;
- EGLint i;
-
- for (i = 0; i < dpy->Configs->Size; i++) {
- gconf = egl_g3d_config((_EGLConfig *) dpy->Configs->Elements[i]);
- if (gdpy->native->is_pixmap_supported(gdpy->native, pix, gconf->native))
- break;
- }
-
- return (i < dpy->Configs->Size) ? &gconf->base : NULL;
-}
-
void
egl_g3d_init_driver_api(_EGLDriver *drv)
{
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.h b/src/gallium/state_trackers/egl/common/egl_g3d_api.h
index d5196c12fe..17fd7958aa 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_api.h
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.h
@@ -31,7 +31,4 @@
void
egl_g3d_init_driver_api(_EGLDriver *drv);
-_EGLConfig *
-egl_g3d_find_pixmap_config(_EGLDisplay *dpy, EGLNativePixmapType pix);
-
#endif /* _EGL_G3D_API_H_ */
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
index b53b121005..1d23305402 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
@@ -35,7 +35,6 @@
#include "native.h"
#include "egl_g3d.h"
-#include "egl_g3d_api.h"
#include "egl_g3d_image.h"
/* for struct winsys_handle */
@@ -53,17 +52,11 @@ static struct pipe_resource *
egl_g3d_reference_native_pixmap(_EGLDisplay *dpy, EGLNativePixmapType pix)
{
struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
- struct egl_g3d_config *gconf;
struct native_surface *nsurf;
struct pipe_resource *textures[NUM_NATIVE_ATTACHMENTS];
enum native_attachment natt;
- gconf = egl_g3d_config(egl_g3d_find_pixmap_config(dpy, pix));
- if (!gconf)
- return NULL;
-
- nsurf = gdpy->native->create_pixmap_surface(gdpy->native,
- pix, gconf->native);
+ nsurf = gdpy->native->create_pixmap_surface(gdpy->native, pix, NULL);
if (!nsurf)
return NULL;
@@ -123,6 +116,7 @@ egl_g3d_create_drm_buffer(_EGLDisplay *dpy, _EGLImage *img,
templ.width0 = attrs.Width;
templ.height0 = attrs.Height;
templ.depth0 = 1;
+ templ.array_size = 1;
/*
* XXX fix apps (e.g. wayland) and pipe drivers (e.g. i915) and remove the
@@ -147,7 +141,7 @@ egl_g3d_reference_drm_buffer(_EGLDisplay *dpy, EGLint name,
_EGLImageAttribs attrs;
EGLint format;
- if (dpy->Platform != _EGL_PLATFORM_DRM)
+ if (!dpy->Extensions.MESA_drm_image)
return NULL;
if (_eglParseImageAttribList(&attrs, dpy, attribs) != EGL_SUCCESS)
@@ -236,10 +230,12 @@ egl_g3d_reference_android_native_buffer(_EGLDisplay *dpy,
memset(&templ, 0, sizeof(templ));
templ.target = PIPE_TEXTURE_2D;
templ.format = format;
- templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+ /* assume for texturing only */
+ templ.bind = PIPE_BIND_SAMPLER_VIEW;
templ.width0 = buf->width;
templ.height0 = buf->height;
templ.depth0 = 1;
+ templ.array_size = 1;
res = gdpy->native->buffer->import_buffer(gdpy->native,
&templ, (void *) buf);
@@ -385,7 +381,7 @@ egl_g3d_export_drm_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLImage *img,
struct egl_g3d_image *gimg = egl_g3d_image(img);
struct winsys_handle wsh;
- if (dpy->Platform != _EGL_PLATFORM_DRM)
+ if (!dpy->Extensions.MESA_drm_image)
return EGL_FALSE;
/* get shared handle */
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_sync.c b/src/gallium/state_trackers/egl/common/egl_g3d_sync.c
index 4e6d944c15..dd07af140a 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_sync.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_sync.c
@@ -109,7 +109,7 @@ egl_g3d_wait_fence_sync(struct egl_g3d_sync *gsync, EGLTimeKHR timeout)
_eglUnlockMutex(&dpy->Mutex);
/* no timed finish? */
- screen->fence_finish(screen, fence, 0x0);
+ screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE);
ret = EGL_CONDITION_SATISFIED_KHR;
_eglLockMutex(&dpy->Mutex);
@@ -234,7 +234,7 @@ egl_g3d_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
struct egl_g3d_context *gctx = egl_g3d_context(ctx);
if (gctx)
- gctx->stctxi->flush(gctx->stctxi, PIPE_FLUSH_RENDER_CACHE , NULL);
+ gctx->stctxi->flush(gctx->stctxi, ST_FLUSH_FRONT, NULL);
}
if (timeout) {
diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h
index ca8cb97f71..9e9cd3fd05 100644
--- a/src/gallium/state_trackers/egl/common/native.h
+++ b/src/gallium/state_trackers/egl/common/native.h
@@ -126,8 +126,6 @@ struct native_config {
int native_visual_id;
int native_visual_type;
int level;
- int samples;
- boolean slow_config;
boolean transparent_rgb;
int transparent_rgb_values[3];
};
@@ -144,6 +142,11 @@ struct native_display {
struct pipe_screen *screen;
/**
+ * Context used for copy operations.
+ */
+ struct pipe_context *pipe;
+
+ /**
* Available for caller's use.
*/
void *user_data;
@@ -185,7 +188,9 @@ struct native_display {
const struct native_config *nconf);
/**
- * Create a pixmap surface. Required unless no config has pixmap_bit set.
+ * Create a pixmap surface. The native config may be NULL. In that case, a
+ * "best config" will be picked. Required unless no config has pixmap_bit
+ * set.
*/
struct native_surface *(*create_pixmap_surface)(struct native_display *ndpy,
EGLNativePixmapType pix,
@@ -223,11 +228,35 @@ native_attachment_mask_test(uint mask, enum native_attachment att)
return !!(mask & (1 << att));
}
+/**
+ * Get the display copy context
+ */
+static INLINE struct pipe_context *
+ndpy_get_copy_context(struct native_display *ndpy)
+{
+ if (!ndpy->pipe)
+ ndpy->pipe = ndpy->screen->context_create(ndpy->screen, NULL);
+ return ndpy->pipe;
+}
+
+/**
+ * Free display screen and context resources
+ */
+static INLINE void
+ndpy_uninit(struct native_display *ndpy)
+{
+ if (ndpy->pipe)
+ ndpy->pipe->destroy(ndpy->pipe);
+ if (ndpy->screen)
+ ndpy->screen->destroy(ndpy->screen);
+}
+
struct native_platform {
const char *name;
+ void (*set_event_handler)(struct native_event_handler *handler);
struct native_display *(*create_display)(void *dpy,
- struct native_event_handler *handler,
+ boolean use_sw,
void *user_data);
};
@@ -238,6 +267,9 @@ const struct native_platform *
native_get_x11_platform(void);
const struct native_platform *
+native_get_wayland_platform(void);
+
+const struct native_platform *
native_get_drm_platform(void);
const struct native_platform *
diff --git a/src/gallium/state_trackers/egl/common/native_helper.c b/src/gallium/state_trackers/egl/common/native_helper.c
index 0f2d02032b..ee18cb2025 100644
--- a/src/gallium/state_trackers/egl/common/native_helper.c
+++ b/src/gallium/state_trackers/egl/common/native_helper.c
@@ -3,6 +3,7 @@
* Version: 7.9
*
* Copyright (C) 2010 LunarG Inc.
+ * Copyright (C) 2011 VMware Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,6 +25,7 @@
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
+ * Thomas Hellstrom <thellstrom@vmware.com>
*/
#include "util/u_inlines.h"
@@ -34,6 +36,14 @@
#include "native_helper.h"
+/**
+ * Number of swap fences and mask
+ */
+
+#define EGL_SWAP_FENCES_MAX 4
+#define EGL_SWAP_FENCES_MASK 3
+#define EGL_SWAP_FENCES_DEFAULT 1
+
struct resource_surface {
struct pipe_screen *screen;
enum pipe_format format;
@@ -42,6 +52,15 @@ struct resource_surface {
struct pipe_resource *resources[NUM_NATIVE_ATTACHMENTS];
uint resource_mask;
uint width, height;
+
+ /**
+ * Swap fences.
+ */
+ struct pipe_fence_handle *swap_fences[EGL_SWAP_FENCES_MAX];
+ unsigned int cur_fences;
+ unsigned int head;
+ unsigned int tail;
+ unsigned int desired_fences;
};
struct resource_surface *
@@ -49,11 +68,16 @@ resource_surface_create(struct pipe_screen *screen,
enum pipe_format format, uint bind)
{
struct resource_surface *rsurf = CALLOC_STRUCT(resource_surface);
+ char *swap_fences = getenv("EGL_THROTTLE_FENCES");
if (rsurf) {
rsurf->screen = screen;
rsurf->format = format;
rsurf->bind = bind;
+ rsurf->desired_fences = (swap_fences) ? atoi(swap_fences) :
+ EGL_SWAP_FENCES_DEFAULT;
+ if (rsurf->desired_fences > EGL_SWAP_FENCES_MAX)
+ rsurf->desired_fences = EGL_SWAP_FENCES_MAX;
}
return rsurf;
@@ -143,6 +167,14 @@ resource_surface_add_resources(struct resource_surface *rsurf,
return ((rsurf->resource_mask & resource_mask) == resource_mask);
}
+void
+resource_surface_import_resource(struct resource_surface *rsurf,
+ enum native_attachment which,
+ struct pipe_resource *pres)
+{
+ pipe_resource_reference(&rsurf->resources[which], pres);
+ rsurf->resource_mask |= 1 << which;
+}
void
resource_surface_get_resources(struct resource_surface *rsurf,
@@ -217,3 +249,121 @@ resource_surface_present(struct resource_surface *rsurf,
return TRUE;
}
+
+/**
+ * Schedule a copy swap from the back to the front buffer using the
+ * native display's copy context.
+ */
+boolean
+resource_surface_copy_swap(struct resource_surface *rsurf,
+ struct native_display *ndpy)
+{
+ struct pipe_resource *ftex;
+ struct pipe_resource *btex;
+ struct pipe_context *pipe;
+ struct pipe_box src_box;
+ boolean ret = FALSE;
+
+ pipe = ndpy_get_copy_context(ndpy);
+ if (!pipe)
+ return FALSE;
+
+ ftex = resource_surface_get_single_resource(rsurf,
+ NATIVE_ATTACHMENT_FRONT_LEFT);
+ if (!ftex)
+ goto out_no_ftex;
+ btex = resource_surface_get_single_resource(rsurf,
+ NATIVE_ATTACHMENT_BACK_LEFT);
+ if (!btex)
+ goto out_no_btex;
+
+ u_box_origin_2d(ftex->width0, ftex->height0, &src_box);
+ pipe->resource_copy_region(pipe, ftex, 0, 0, 0, 0,
+ btex, 0, &src_box);
+ ret = TRUE;
+
+ out_no_ftex:
+ pipe_resource_reference(&btex, NULL);
+ out_no_btex:
+ pipe_resource_reference(&ftex, NULL);
+
+ return ret;
+}
+
+static struct pipe_fence_handle *
+swap_fences_pop_front(struct resource_surface *rsurf)
+{
+ struct pipe_screen *screen = rsurf->screen;
+ struct pipe_fence_handle *fence = NULL;
+
+ if (rsurf->desired_fences == 0)
+ return NULL;
+
+ if (rsurf->cur_fences >= rsurf->desired_fences) {
+ screen->fence_reference(screen, &fence, rsurf->swap_fences[rsurf->tail]);
+ screen->fence_reference(screen, &rsurf->swap_fences[rsurf->tail++], NULL);
+ rsurf->tail &= EGL_SWAP_FENCES_MASK;
+ --rsurf->cur_fences;
+ }
+ return fence;
+}
+
+static void
+swap_fences_push_back(struct resource_surface *rsurf,
+ struct pipe_fence_handle *fence)
+{
+ struct pipe_screen *screen = rsurf->screen;
+
+ if (!fence || rsurf->desired_fences == 0)
+ return;
+
+ while(rsurf->cur_fences == rsurf->desired_fences)
+ swap_fences_pop_front(rsurf);
+
+ rsurf->cur_fences++;
+ screen->fence_reference(screen, &rsurf->swap_fences[rsurf->head++],
+ fence);
+ rsurf->head &= EGL_SWAP_FENCES_MASK;
+}
+
+boolean
+resource_surface_throttle(struct resource_surface *rsurf)
+{
+ struct pipe_screen *screen = rsurf->screen;
+ struct pipe_fence_handle *fence = swap_fences_pop_front(rsurf);
+
+ if (fence) {
+ (void) screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE);
+ screen->fence_reference(screen, &fence, NULL);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+boolean
+resource_surface_flush(struct resource_surface *rsurf,
+ struct native_display *ndpy)
+{
+ struct pipe_fence_handle *fence = NULL;
+ struct pipe_screen *screen = rsurf->screen;
+ struct pipe_context *pipe= ndpy_get_copy_context(ndpy);
+
+ if (!pipe)
+ return FALSE;
+
+ pipe->flush(pipe, &fence);
+ if (fence == NULL)
+ return FALSE;
+
+ swap_fences_push_back(rsurf, fence);
+ screen->fence_reference(screen, &fence, NULL);
+
+ return TRUE;
+}
+
+void
+resource_surface_wait(struct resource_surface *rsurf)
+{
+ while (resource_surface_throttle(rsurf));
+}
diff --git a/src/gallium/state_trackers/egl/common/native_helper.h b/src/gallium/state_trackers/egl/common/native_helper.h
index d1569ac3ea..39564a0436 100644
--- a/src/gallium/state_trackers/egl/common/native_helper.h
+++ b/src/gallium/state_trackers/egl/common/native_helper.h
@@ -3,6 +3,7 @@
* Version: 7.9
*
* Copyright (C) 2010 LunarG Inc.
+ * Copyright (C) 2011 VMware Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,6 +25,7 @@
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
+ * Thomas Hellstrom <thellstrom@vmware.com>
*/
#include "native.h"
@@ -51,6 +53,11 @@ resource_surface_add_resources(struct resource_surface *rsurf,
uint resource_mask);
void
+resource_surface_import_resource(struct resource_surface *rsurf,
+ enum native_attachment which,
+ struct pipe_resource *pres);
+
+void
resource_surface_get_resources(struct resource_surface *rsurf,
struct pipe_resource **resources,
uint resource_mask);
@@ -69,3 +76,32 @@ boolean
resource_surface_present(struct resource_surface *rsurf,
enum native_attachment which,
void *winsys_drawable_handle);
+
+/**
+ * Perform a gallium copy blit between the back left and front left
+ * surfaces. Needs to be followed by a call to resource_surface_flush.
+ */
+boolean
+resource_surface_copy_swap(struct resource_surface *rsurf,
+ struct native_display *ndpy);
+
+/**
+ * Throttle on outstanding rendering using the copy context. For example
+ * copy swaps.
+ */
+boolean
+resource_surface_throttle(struct resource_surface *rsurf);
+
+/**
+ * Flush pending rendering using the copy context. This function saves a
+ * marker for upcoming throttles.
+ */
+boolean
+resource_surface_flush(struct resource_surface *rsurf,
+ struct native_display *ndpy);
+/**
+ * Wait for all rendering using the copy context to be complete. Frees all
+ * throttle markers saved using resource_surface_flush.
+ */
+void
+resource_surface_wait(struct resource_surface *rsurf);
diff --git a/src/gallium/state_trackers/egl/drm/modeset.c b/src/gallium/state_trackers/egl/drm/modeset.c
index 0cc06caa2a..3fff954090 100644
--- a/src/gallium/state_trackers/egl/drm/modeset.c
+++ b/src/gallium/state_trackers/egl/drm/modeset.c
@@ -3,6 +3,7 @@
* Version: 7.9
*
* Copyright (C) 2010 LunarG Inc.
+ * Copyright (C) 2011 VMware Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,6 +25,7 @@
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
+ * Thomas Hellstrom <thellstrom@vmware.com>
*/
#include "util/u_memory.h"
@@ -131,6 +133,25 @@ drm_surface_flush_frontbuffer(struct native_surface *nsurf)
}
static boolean
+drm_surface_copy_swap(struct native_surface *nsurf)
+{
+ struct drm_surface *drmsurf = drm_surface(nsurf);
+ struct drm_display *drmdpy = drmsurf->drmdpy;
+
+ (void) resource_surface_throttle(drmsurf->rsurf);
+ if (!resource_surface_copy_swap(drmsurf->rsurf, &drmdpy->base))
+ return FALSE;
+
+ (void) resource_surface_flush(drmsurf->rsurf, &drmdpy->base);
+ if (!drm_surface_flush_frontbuffer(nsurf))
+ return FALSE;
+
+ drmsurf->sequence_number++;
+
+ return TRUE;
+}
+
+static boolean
drm_surface_swap_buffers(struct native_surface *nsurf)
{
struct drm_surface *drmsurf = drm_surface(nsurf);
@@ -139,17 +160,21 @@ drm_surface_swap_buffers(struct native_surface *nsurf)
struct drm_framebuffer tmp_fb;
int err;
+ if (!drmsurf->have_pageflip)
+ return drm_surface_copy_swap(nsurf);
+
if (!drmsurf->back_fb.buffer_id) {
if (!drm_surface_init_framebuffers(&drmsurf->base, TRUE))
return FALSE;
}
if (drmsurf->is_shown && drmcrtc->crtc) {
- err = drmModeSetCrtc(drmdpy->fd, drmcrtc->crtc->crtc_id,
- drmsurf->back_fb.buffer_id, drmcrtc->crtc->x, drmcrtc->crtc->y,
- drmcrtc->connectors, drmcrtc->num_connectors, &drmcrtc->crtc->mode);
- if (err)
- return FALSE;
+ err = drmModePageFlip(drmdpy->fd, drmcrtc->crtc->crtc_id,
+ drmsurf->back_fb.buffer_id, 0, NULL);
+ if (err) {
+ drmsurf->have_pageflip = FALSE;
+ return drm_surface_copy_swap(nsurf);
+ }
}
/* swap the buffers */
@@ -175,7 +200,7 @@ drm_surface_present(struct native_surface *nsurf,
{
boolean ret;
- if (preserve || swap_interval)
+ if (swap_interval)
return FALSE;
switch (natt) {
@@ -183,7 +208,10 @@ drm_surface_present(struct native_surface *nsurf,
ret = drm_surface_flush_frontbuffer(nsurf);
break;
case NATIVE_ATTACHMENT_BACK_LEFT:
- ret = drm_surface_swap_buffers(nsurf);
+ if (preserve)
+ ret = drm_surface_copy_swap(nsurf);
+ else
+ ret = drm_surface_swap_buffers(nsurf);
break;
default:
ret = FALSE;
@@ -196,7 +224,9 @@ drm_surface_present(struct native_surface *nsurf,
static void
drm_surface_wait(struct native_surface *nsurf)
{
- /* no-op */
+ struct drm_surface *drmsurf = drm_surface(nsurf);
+
+ resource_surface_wait(drmsurf->rsurf);
}
static void
@@ -204,6 +234,7 @@ drm_surface_destroy(struct native_surface *nsurf)
{
struct drm_surface *drmsurf = drm_surface(nsurf);
+ resource_surface_wait(drmsurf->rsurf);
if (drmsurf->current_crtc.crtc)
drmModeFreeCrtc(drmsurf->current_crtc.crtc);
@@ -236,6 +267,7 @@ drm_display_create_surface(struct native_display *ndpy,
drmsurf->color_format = drmconf->base.color_format;
drmsurf->width = width;
drmsurf->height = height;
+ drmsurf->have_pageflip = TRUE;
drmsurf->rsurf = resource_surface_create(drmdpy->base.screen,
drmsurf->color_format,
diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c
index 2441b43fd8..9863329ff4 100644
--- a/src/gallium/state_trackers/egl/drm/native_drm.c
+++ b/src/gallium/state_trackers/egl/drm/native_drm.c
@@ -43,7 +43,7 @@ drm_display_is_format_supported(struct native_display *ndpy,
return ndpy->screen->is_format_supported(ndpy->screen,
fmt, PIPE_TEXTURE_2D, 0,
(is_color) ? PIPE_BIND_RENDER_TARGET :
- PIPE_BIND_DEPTH_STENCIL, 0);
+ PIPE_BIND_DEPTH_STENCIL);
}
static const struct native_config **
@@ -124,8 +124,7 @@ drm_display_destroy(struct native_display *ndpy)
drm_display_fini_modeset(&drmdpy->base);
- if (drmdpy->base.screen)
- drmdpy->base.screen->destroy(drmdpy->base.screen);
+ ndpy_uninit(ndpy);
if (drmdpy->fd >= 0)
close(drmdpy->fd);
@@ -178,7 +177,7 @@ drm_display_init_screen(struct native_display *ndpy)
drmFreeVersion(version);
if (!drmdpy->base.screen) {
- _eglLog(_EGL_WARNING, "failed to create DRM screen");
+ _eglLog(_EGL_DEBUG, "failed to create DRM screen");
return FALSE;
}
@@ -237,9 +236,16 @@ drm_create_display(int fd, struct native_event_handler *event_handler,
return &drmdpy->base;
}
+static struct native_event_handler *drm_event_handler;
+
+static void
+native_set_event_handler(struct native_event_handler *event_handler)
+{
+ drm_event_handler = event_handler;
+}
+
static struct native_display *
-native_create_display(void *dpy, struct native_event_handler *event_handler,
- void *user_data)
+native_create_display(void *dpy, boolean use_sw, void *user_data)
{
int fd;
@@ -252,11 +258,12 @@ native_create_display(void *dpy, struct native_event_handler *event_handler,
if (fd < 0)
return NULL;
- return drm_create_display(fd, event_handler, user_data);
+ return drm_create_display(fd, drm_event_handler, user_data);
}
static const struct native_platform drm_platform = {
"DRM", /* name */
+ native_set_event_handler,
native_create_display
};
diff --git a/src/gallium/state_trackers/egl/drm/native_drm.h b/src/gallium/state_trackers/egl/drm/native_drm.h
index 03c4fe01dc..7da9b45f23 100644
--- a/src/gallium/state_trackers/egl/drm/native_drm.h
+++ b/src/gallium/state_trackers/egl/drm/native_drm.h
@@ -91,6 +91,8 @@ struct drm_surface {
boolean is_shown;
struct drm_crtc current_crtc;
+
+ boolean have_pageflip;
};
struct drm_connector {
diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c
index 1b5ea8bf9d..e2fde00e97 100644
--- a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c
+++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c
@@ -320,7 +320,7 @@ fbdev_display_destroy(struct native_display *ndpy)
{
struct fbdev_display *fbdpy = fbdev_display(ndpy);
- fbdpy->base.screen->destroy(fbdpy->base.screen);
+ ndpy_uninit(&fbdpy->base);
close(fbdpy->fd);
FREE(fbdpy);
}
@@ -422,7 +422,7 @@ fbdev_display_init(struct native_display *ndpy)
if (fbdpy->base.screen) {
if (!fbdpy->base.screen->is_format_supported(fbdpy->base.screen,
fbdpy->config.color_format, PIPE_TEXTURE_2D, 0,
- PIPE_BIND_RENDER_TARGET, 0)) {
+ PIPE_BIND_RENDER_TARGET)) {
fbdpy->base.screen->destroy(fbdpy->base.screen);
fbdpy->base.screen = NULL;
}
@@ -459,9 +459,16 @@ fbdev_display_create(int fd, struct native_event_handler *event_handler,
return &fbdpy->base;
}
+static struct native_event_handler *fbdev_event_handler;
+
+static void
+native_set_event_handler(struct native_event_handler *event_handler)
+{
+ fbdev_event_handler = event_handler;
+}
+
static struct native_display *
-native_create_display(void *dpy, struct native_event_handler *event_handler,
- void *user_data)
+native_create_display(void *dpy, boolean use_sw, void *user_data)
{
struct native_display *ndpy;
int fd;
@@ -476,7 +483,7 @@ native_create_display(void *dpy, struct native_event_handler *event_handler,
if (fd < 0)
return NULL;
- ndpy = fbdev_display_create(fd, event_handler, user_data);
+ ndpy = fbdev_display_create(fd, fbdev_event_handler, user_data);
if (!ndpy)
close(fd);
@@ -485,6 +492,7 @@ native_create_display(void *dpy, struct native_event_handler *event_handler,
static const struct native_platform fbdev_platform = {
"FBDEV", /* name */
+ native_set_event_handler,
native_create_display
};
diff --git a/src/gallium/state_trackers/egl/gdi/native_gdi.c b/src/gallium/state_trackers/egl/gdi/native_gdi.c
index d259e6edc8..5d0045f92e 100644
--- a/src/gallium/state_trackers/egl/gdi/native_gdi.c
+++ b/src/gallium/state_trackers/egl/gdi/native_gdi.c
@@ -285,7 +285,7 @@ fill_color_formats(struct native_display *ndpy, enum pipe_format formats[8])
for (i = 0; i < Elements(candidates); i++) {
if (screen->is_format_supported(screen, candidates[i],
- PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET, 0))
+ PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET))
formats[count++] = candidates[i];
}
@@ -319,7 +319,6 @@ gdi_display_get_configs(struct native_display *ndpy, int *num_configs)
nconf->color_format = formats[i];
nconf->window_bit = TRUE;
- nconf->slow_config = TRUE;
}
gdpy->num_configs = count;
@@ -364,7 +363,7 @@ gdi_display_destroy(struct native_display *ndpy)
if (gdpy->configs)
FREE(gdpy->configs);
- gdpy->base.screen->destroy(gdpy->base.screen);
+ ndpy_uninit(ndpy);
FREE(gdpy);
}
@@ -407,15 +406,23 @@ gdi_create_display(HDC hDC, struct native_event_handler *event_handler,
return &gdpy->base;
}
+static struct native_event_handler *gdi_event_handler;
+
+static void
+native_set_event_handler(struct native_event_handler *event_handler)
+{
+ gdi_event_handler = event_handler;
+}
+
static struct native_display *
-native_create_display(void *dpy, struct native_event_handler *event_handler,
- void *user_data)
+native_create_display(void *dpy, boolean use_sw, void *user_data)
{
- return gdi_create_display((HDC) dpy, event_handler, user_data);
+ return gdi_create_display((HDC) dpy, gdi_event_handler, user_data);
}
static const struct native_platform gdi_platform = {
"GDI", /* name */
+ native_set_event_handler,
native_create_display
};
diff --git a/src/gallium/state_trackers/egl/wayland/native_wayland.c b/src/gallium/state_trackers/egl/wayland/native_wayland.c
new file mode 100644
index 0000000000..068c3cd7c8
--- /dev/null
+++ b/src/gallium/state_trackers/egl/wayland/native_wayland.c
@@ -0,0 +1,626 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.11
+ *
+ * Copyright (C) 2011 Benjamin Franzke <benjaminfranzke@googlemail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "state_tracker/drm_driver.h"
+
+#include "egllog.h"
+
+#include "native_wayland.h"
+
+/* see get_drm_screen_name */
+#include <radeon_drm.h>
+#include "radeon/drm/radeon_drm_public.h"
+
+#include <wayland-client.h>
+#include "wayland-drm-client-protocol.h"
+#include "wayland-egl-priv.h"
+
+#include <xf86drm.h>
+
+static struct native_event_handler *wayland_event_handler;
+
+static void
+sync_callback(void *data)
+{
+ int *done = data;
+
+ *done = 1;
+}
+
+static void
+force_roundtrip(struct wl_display *display)
+{
+ int done = 0;
+
+ wl_display_sync_callback(display, sync_callback, &done);
+ wl_display_iterate(display, WL_DISPLAY_WRITABLE);
+ while (!done)
+ wl_display_iterate(display, WL_DISPLAY_READABLE);
+}
+
+static const struct native_config **
+wayland_display_get_configs (struct native_display *ndpy, int *num_configs)
+{
+ struct wayland_display *display = wayland_display(ndpy);
+ const struct native_config **configs;
+
+ if (!display->config) {
+ struct native_config *nconf;
+ enum pipe_format format;
+ display->config = CALLOC(1, sizeof(*display->config));
+ if (!display->config)
+ return NULL;
+ nconf = &display->config->base;
+
+ nconf->buffer_mask =
+ (1 << NATIVE_ATTACHMENT_FRONT_LEFT) |
+ (1 << NATIVE_ATTACHMENT_BACK_LEFT);
+
+ format = PIPE_FORMAT_B8G8R8A8_UNORM;
+
+ nconf->color_format = format;
+ nconf->window_bit = TRUE;
+ nconf->pixmap_bit = TRUE;
+ }
+
+ configs = MALLOC(sizeof(*configs));
+ if (configs) {
+ configs[0] = &display->config->base;
+ if (num_configs)
+ *num_configs = 1;
+ }
+
+ return configs;
+}
+
+static int
+wayland_display_get_param(struct native_display *ndpy,
+ enum native_param_type param)
+{
+ int val;
+
+ switch (param) {
+ case NATIVE_PARAM_USE_NATIVE_BUFFER:
+ case NATIVE_PARAM_PRESERVE_BUFFER:
+ case NATIVE_PARAM_MAX_SWAP_INTERVAL:
+ default:
+ val = 0;
+ break;
+ }
+
+ return val;
+}
+
+static boolean
+wayland_display_is_pixmap_supported(struct native_display *ndpy,
+ EGLNativePixmapType pix,
+ const struct native_config *nconf)
+{
+ /* all wl_egl_pixmaps are supported */
+
+ return TRUE;
+}
+
+static void
+wayland_display_destroy(struct native_display *ndpy)
+{
+ struct wayland_display *display = wayland_display(ndpy);
+
+ if (display->config)
+ FREE(display->config);
+
+ ndpy_uninit(ndpy);
+
+ FREE(display);
+}
+
+
+static struct wl_buffer *
+wayland_create_buffer(struct wayland_surface *surface,
+ enum native_attachment attachment)
+{
+ struct wayland_display *display = surface->display;
+ struct pipe_resource *resource;
+ struct winsys_handle wsh;
+ uint width, height;
+
+ resource = resource_surface_get_single_resource(surface->rsurf, attachment);
+ resource_surface_get_size(surface->rsurf, &width, &height);
+
+ wsh.type = DRM_API_HANDLE_TYPE_SHARED;
+ display->base.screen->resource_get_handle(display->base.screen, resource, &wsh);
+
+ pipe_resource_reference(&resource, NULL);
+
+ return wl_drm_create_buffer(display->dpy->drm, wsh.handle,
+ width, height,
+ wsh.stride, surface->win->visual);
+}
+
+static void
+wayland_pixmap_destroy(struct wl_egl_pixmap *egl_pixmap)
+{
+ struct pipe_resource *resource = egl_pixmap->driver_private;
+
+ assert(resource);
+
+ pipe_resource_reference(&resource, NULL);
+
+ egl_pixmap->driver_private = NULL;
+ egl_pixmap->destroy = NULL;
+ egl_pixmap->name = 0;
+}
+
+static void
+wayland_pixmap_surface_intialize(struct wayland_surface *surface)
+{
+ struct native_display *ndpy = &surface->display->base;
+ struct pipe_resource *resource;
+ struct winsys_handle wsh;
+ const enum native_attachment front_natt = NATIVE_ATTACHMENT_FRONT_LEFT;
+
+ if (surface->pix->name > 0)
+ return;
+
+ resource = resource_surface_get_single_resource(surface->rsurf, front_natt);
+
+ wsh.type = DRM_API_HANDLE_TYPE_SHARED;
+ ndpy->screen->resource_get_handle(ndpy->screen, resource, &wsh);
+
+ surface->pix->name = wsh.handle;
+ surface->pix->stride = wsh.stride;
+ surface->pix->destroy = wayland_pixmap_destroy;
+ surface->pix->driver_private = resource;
+}
+
+static void
+wayland_release_pending_resource(void *data)
+{
+ struct wayland_surface *surface = data;
+
+ /* FIXME: print internal error */
+ if (!surface->pending_resource)
+ return;
+
+ pipe_resource_reference(&surface->pending_resource, NULL);
+}
+
+static void
+wayland_window_surface_handle_resize(struct wayland_surface *surface)
+{
+ struct wayland_display *display = surface->display;
+ struct pipe_resource *front_resource;
+ const enum native_attachment front_natt = NATIVE_ATTACHMENT_FRONT_LEFT;
+ int i;
+
+ front_resource = resource_surface_get_single_resource(surface->rsurf,
+ front_natt);
+ if (resource_surface_set_size(surface->rsurf,
+ surface->win->width, surface->win->height)) {
+
+ if (surface->pending_resource)
+ force_roundtrip(display->dpy->display);
+
+ if (front_resource) {
+ surface->pending_resource = front_resource;
+ front_resource = NULL;
+ wl_display_sync_callback(display->dpy->display,
+ wayland_release_pending_resource, surface);
+ }
+
+ for (i = 0; i < WL_BUFFER_COUNT; ++i) {
+ if (surface->buffer[i])
+ wl_buffer_destroy(surface->buffer[i]);
+ surface->buffer[i] = NULL;
+ }
+ }
+ pipe_resource_reference(&front_resource, NULL);
+
+ surface->dx = surface->win->dx;
+ surface->dy = surface->win->dy;
+ surface->win->dx = 0;
+ surface->win->dy = 0;
+}
+
+static boolean
+wayland_surface_validate(struct native_surface *nsurf, uint attachment_mask,
+ unsigned int *seq_num, struct pipe_resource **textures,
+ int *width, int *height)
+{
+ struct wayland_surface *surface = wayland_surface(nsurf);
+
+ if (surface->type == WL_WINDOW_SURFACE)
+ wayland_window_surface_handle_resize(surface);
+
+ if (!resource_surface_add_resources(surface->rsurf, attachment_mask |
+ surface->attachment_mask))
+ return FALSE;
+
+ if (textures)
+ resource_surface_get_resources(surface->rsurf, textures, attachment_mask);
+
+ if (seq_num)
+ *seq_num = surface->sequence_number;
+
+ resource_surface_get_size(surface->rsurf, (uint *) width, (uint *) height);
+
+ if (surface->type == WL_PIXMAP_SURFACE)
+ wayland_pixmap_surface_intialize(surface);
+
+ return TRUE;
+}
+
+static void
+wayland_frame_callback(void *data, uint32_t time)
+{
+ struct wayland_surface *surface = data;
+
+ surface->block_swap_buffers = FALSE;
+}
+
+static INLINE void
+wayland_buffers_swap(struct wl_buffer **buffer,
+ enum wayland_buffer_type buf1,
+ enum wayland_buffer_type buf2)
+{
+ struct wl_buffer *tmp = buffer[buf1];
+ buffer[buf1] = buffer[buf2];
+ buffer[buf2] = tmp;
+}
+
+static boolean
+wayland_surface_swap_buffers(struct native_surface *nsurf)
+{
+ struct wayland_surface *surface = wayland_surface(nsurf);
+ struct wayland_display *display = surface->display;
+
+ while (surface->block_swap_buffers)
+ wl_display_iterate(display->dpy->display, WL_DISPLAY_READABLE);
+
+ surface->block_swap_buffers = TRUE;
+ wl_display_frame_callback(display->dpy->display, wayland_frame_callback,
+ surface);
+
+ if (surface->type == WL_WINDOW_SURFACE) {
+ resource_surface_swap_buffers(surface->rsurf,
+ NATIVE_ATTACHMENT_FRONT_LEFT, NATIVE_ATTACHMENT_BACK_LEFT, FALSE);
+
+ wayland_buffers_swap(surface->buffer, WL_BUFFER_FRONT, WL_BUFFER_BACK);
+
+ if (surface->buffer[WL_BUFFER_FRONT] == NULL)
+ surface->buffer[WL_BUFFER_FRONT] =
+ wayland_create_buffer(surface, NATIVE_ATTACHMENT_FRONT_LEFT);
+
+ wl_surface_attach(surface->win->surface, surface->buffer[WL_BUFFER_FRONT],
+ surface->dx, surface->dy);
+
+ resource_surface_get_size(surface->rsurf,
+ (uint *) &surface->win->attached_width,
+ (uint *) &surface->win->attached_height);
+ surface->dx = 0;
+ surface->dy = 0;
+ }
+
+ surface->sequence_number++;
+ wayland_event_handler->invalid_surface(&display->base,
+ &surface->base, surface->sequence_number);
+
+ return TRUE;
+}
+
+static boolean
+wayland_surface_present(struct native_surface *nsurf,
+ enum native_attachment natt,
+ boolean preserve,
+ uint swap_interval)
+{
+ struct wayland_surface *surface = wayland_surface(nsurf);
+ uint width, height;
+ boolean ret;
+
+ if (preserve || swap_interval)
+ return FALSE;
+
+ switch (natt) {
+ case NATIVE_ATTACHMENT_FRONT_LEFT:
+ ret = TRUE;
+ break;
+ case NATIVE_ATTACHMENT_BACK_LEFT:
+ ret = wayland_surface_swap_buffers(nsurf);
+ break;
+ default:
+ ret = FALSE;
+ break;
+ }
+
+ if (surface->type == WL_WINDOW_SURFACE) {
+ resource_surface_get_size(surface->rsurf, &width, &height);
+ wl_surface_damage(surface->win->surface, 0, 0, width, height);
+ }
+
+ return ret;
+}
+
+static void
+wayland_surface_wait(struct native_surface *nsurf)
+{
+ /* no-op */
+}
+
+static void
+wayland_surface_destroy(struct native_surface *nsurf)
+{
+ struct wayland_surface *surface = wayland_surface(nsurf);
+ enum wayland_buffer_type buffer;
+
+ for (buffer = 0; buffer < WL_BUFFER_COUNT; ++buffer) {
+ if (surface->buffer[buffer])
+ wl_buffer_destroy(surface->buffer[buffer]);
+ }
+
+ resource_surface_destroy(surface->rsurf);
+ FREE(surface);
+}
+
+static struct native_surface *
+wayland_create_pixmap_surface(struct native_display *ndpy,
+ EGLNativePixmapType pix,
+ const struct native_config *nconf)
+{
+ struct wayland_display *display = wayland_display(ndpy);
+ struct wayland_config *config = wayland_config(nconf);
+ struct wayland_surface *surface;
+ struct wl_egl_pixmap *egl_pixmap = (struct wl_egl_pixmap *) pix;
+ enum native_attachment natt = NATIVE_ATTACHMENT_FRONT_LEFT;
+
+ surface = CALLOC_STRUCT(wayland_surface);
+ if (!surface)
+ return NULL;
+
+ surface->display = display;
+
+ surface->pending_resource = NULL;
+ surface->type = WL_PIXMAP_SURFACE;
+ surface->pix = egl_pixmap;
+
+ if (surface->pix->visual == wl_display_get_rgb_visual(display->dpy->display))
+ surface->color_format = PIPE_FORMAT_B8G8R8X8_UNORM;
+ else
+ surface->color_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+
+ surface->attachment_mask = (1 << NATIVE_ATTACHMENT_FRONT_LEFT);
+
+ surface->rsurf = resource_surface_create(display->base.screen,
+ surface->color_format,
+ PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW |
+ PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT);
+
+ if (!surface->rsurf) {
+ FREE(surface);
+ return NULL;
+ }
+
+ resource_surface_set_size(surface->rsurf,
+ egl_pixmap->width, egl_pixmap->height);
+
+ /* the pixmap is already allocated, so import it */
+ if (surface->pix->name > 0)
+ resource_surface_import_resource(surface->rsurf, natt,
+ surface->pix->driver_private);
+
+ surface->base.destroy = wayland_surface_destroy;
+ surface->base.present = wayland_surface_present;
+ surface->base.validate = wayland_surface_validate;
+ surface->base.wait = wayland_surface_wait;
+
+ return &surface->base;
+}
+
+static struct native_surface *
+wayland_create_window_surface(struct native_display *ndpy,
+ EGLNativeWindowType win,
+ const struct native_config *nconf)
+{
+ struct wayland_display *display = wayland_display(ndpy);
+ struct wayland_config *config = wayland_config(nconf);
+ struct wayland_surface *surface;
+
+ surface = CALLOC_STRUCT(wayland_surface);
+ if (!surface)
+ return NULL;
+
+ surface->display = display;
+ surface->color_format = config->base.color_format;
+
+ surface->win = (struct wl_egl_window *) win;
+
+ surface->pending_resource = NULL;
+ surface->block_swap_buffers = FALSE;
+ surface->type = WL_WINDOW_SURFACE;
+
+ surface->buffer[WL_BUFFER_FRONT] = NULL;
+ surface->buffer[WL_BUFFER_BACK] = NULL;
+ surface->attachment_mask = (1 << NATIVE_ATTACHMENT_FRONT_LEFT) |
+ (1 << NATIVE_ATTACHMENT_BACK_LEFT);
+
+ surface->rsurf = resource_surface_create(display->base.screen,
+ surface->color_format,
+ PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW |
+ PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT);
+
+ if (!surface->rsurf) {
+ FREE(surface);
+ return NULL;
+ }
+
+ surface->base.destroy = wayland_surface_destroy;
+ surface->base.present = wayland_surface_present;
+ surface->base.validate = wayland_surface_validate;
+ surface->base.wait = wayland_surface_wait;
+
+ return &surface->base;
+}
+
+static const char *
+get_drm_screen_name(int fd, drmVersionPtr version)
+{
+ const char *name = version->name;
+
+ if (name && !strcmp(name, "radeon")) {
+ int chip_id;
+ struct drm_radeon_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.request = RADEON_INFO_DEVICE_ID;
+ info.value = pointer_to_intptr(&chip_id);
+ if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0)
+ return NULL;
+
+ name = is_r3xx(chip_id) ? "r300" : "r600";
+ }
+
+ return name;
+}
+
+static boolean
+wayland_display_init_screen(struct native_display *ndpy)
+{
+ struct wayland_display *display = wayland_display(ndpy);
+ drmVersionPtr version;
+ const char *driver_name;
+
+ if (display->dpy->fd == -1)
+ force_roundtrip(display->dpy->display);
+ if (display->dpy->fd == -1)
+ return FALSE;
+
+ if (!display->dpy->authenticated)
+ force_roundtrip(display->dpy->display);
+ if (!display->dpy->authenticated)
+ return FALSE;
+
+ version = drmGetVersion(display->dpy->fd);
+ if (!version) {
+ _eglLog(_EGL_WARNING, "invalid fd %d", display->dpy->fd);
+ return FALSE;
+ }
+
+ /* FIXME: share this with native_drm or egl_dri2 */
+ driver_name = get_drm_screen_name(display->dpy->fd, version);
+
+ display->base.screen =
+ wayland_event_handler->new_drm_screen(&display->base,
+ driver_name, display->dpy->fd);
+ drmFreeVersion(version);
+
+ if (!display->base.screen) {
+ _eglLog(_EGL_WARNING, "failed to create DRM screen");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+static void
+wayland_set_event_handler(struct native_event_handler *event_handler)
+{
+ wayland_event_handler = event_handler;
+}
+
+static struct pipe_resource *
+wayland_display_import_buffer(struct native_display *ndpy,
+ const struct pipe_resource *templ,
+ void *buf)
+{
+ return ndpy->screen->resource_from_handle(ndpy->screen,
+ templ, (struct winsys_handle *) buf);
+}
+
+static boolean
+wayland_display_export_buffer(struct native_display *ndpy,
+ struct pipe_resource *res,
+ void *buf)
+{
+ return ndpy->screen->resource_get_handle(ndpy->screen,
+ res, (struct winsys_handle *) buf);
+}
+
+static struct native_display_buffer wayland_display_buffer = {
+ wayland_display_import_buffer,
+ wayland_display_export_buffer
+};
+
+static struct native_display *
+wayland_display_create(void *dpy, boolean use_sw, void *user_data)
+{
+ struct wayland_display *display;
+
+ display = CALLOC_STRUCT(wayland_display);
+ if (!display)
+ return NULL;
+
+ display->base.user_data = user_data;
+
+ display->dpy = dpy;
+ if (!display->dpy->display) {
+ wayland_display_destroy(&display->base);
+ return NULL;
+ }
+
+ if (!wayland_display_init_screen(&display->base)) {
+ wayland_display_destroy(&display->base);
+ return NULL;
+ }
+
+ display->base.destroy = wayland_display_destroy;
+ display->base.get_param = wayland_display_get_param;
+ display->base.get_configs = wayland_display_get_configs;
+ display->base.is_pixmap_supported = wayland_display_is_pixmap_supported;
+ display->base.create_window_surface = wayland_create_window_surface;
+ display->base.create_pixmap_surface = wayland_create_pixmap_surface;
+ display->base.buffer = &wayland_display_buffer;
+
+ return &display->base;
+}
+
+static const struct native_platform wayland_platform = {
+ "wayland", /* name */
+ wayland_set_event_handler,
+ wayland_display_create
+};
+
+const struct native_platform *
+native_get_wayland_platform(void)
+{
+ return &wayland_platform;
+}
diff --git a/src/gallium/state_trackers/egl/wayland/native_wayland.h b/src/gallium/state_trackers/egl/wayland/native_wayland.h
new file mode 100644
index 0000000000..271c10dc11
--- /dev/null
+++ b/src/gallium/state_trackers/egl/wayland/native_wayland.h
@@ -0,0 +1,97 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.11
+ *
+ * Copyright (C) 2011 Benjamin Franzke <benjaminfranzke@googlemail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _NATIVE_WAYLAND_H_
+#define _NATIVE_WAYLAND_H_
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+
+#include "common/native.h"
+#include "common/native_helper.h"
+
+#include "wayland-egl-priv.h"
+
+struct wayland_display {
+ struct native_display base;
+
+ struct wayland_config *config;
+ struct wl_egl_display *dpy;
+};
+
+enum wayland_buffer_type {
+ WL_BUFFER_FRONT,
+ WL_BUFFER_BACK,
+ WL_BUFFER_COUNT
+};
+
+enum wayland_surface_type {
+ WL_WINDOW_SURFACE,
+ WL_PIXMAP_SURFACE,
+ WL_PBUFFER_SURFACE
+};
+
+struct wayland_surface {
+ struct native_surface base;
+ struct wayland_display *display;
+
+ struct wl_egl_window *win;
+ struct wl_egl_pixmap *pix;
+ enum wayland_surface_type type;
+ int dx, dy;
+ struct resource_surface *rsurf;
+ struct pipe_resource *pending_resource;
+ enum pipe_format color_format;
+
+ unsigned int sequence_number;
+ struct wl_buffer *buffer[WL_BUFFER_COUNT];
+ unsigned int attachment_mask;
+
+ boolean block_swap_buffers;
+};
+
+struct wayland_config {
+ struct native_config base;
+};
+
+static INLINE struct wayland_display *
+wayland_display(const struct native_display *ndpy)
+{
+ return (struct wayland_display *) ndpy;
+}
+
+static INLINE struct wayland_surface *
+wayland_surface(const struct native_surface *nsurf)
+{
+ return (struct wayland_surface *) nsurf;
+}
+
+static INLINE struct wayland_config *
+wayland_config(const struct native_config *nconf)
+{
+ return (struct wayland_config *) nconf;
+}
+
+#endif /* _NATIVE_WAYLAND_H_ */
diff --git a/src/gallium/state_trackers/egl/x11/native_dri2.c b/src/gallium/state_trackers/egl/x11/native_dri2.c
index 8108ce4586..5afca67a4d 100644
--- a/src/gallium/state_trackers/egl/x11/native_dri2.c
+++ b/src/gallium/state_trackers/egl/x11/native_dri2.c
@@ -40,11 +40,6 @@
#ifdef GLX_DIRECT_RENDERING
-enum dri2_surface_type {
- DRI2_SURFACE_TYPE_WINDOW,
- DRI2_SURFACE_TYPE_PIXMAP,
-};
-
struct dri2_display {
struct native_display base;
Display *dpy;
@@ -66,7 +61,6 @@ struct dri2_display {
struct dri2_surface {
struct native_surface base;
Drawable drawable;
- enum dri2_surface_type type;
enum pipe_format color_format;
struct dri2_display *dri2dpy;
@@ -439,12 +433,10 @@ dri2_surface_destroy(struct native_surface *nsurf)
static struct dri2_surface *
dri2_display_create_surface(struct native_display *ndpy,
- enum dri2_surface_type type,
Drawable drawable,
- const struct native_config *nconf)
+ enum pipe_format color_format)
{
struct dri2_display *dri2dpy = dri2_display(ndpy);
- struct dri2_config *dri2conf = dri2_config(nconf);
struct dri2_surface *dri2surf;
dri2surf = CALLOC_STRUCT(dri2_surface);
@@ -452,9 +444,8 @@ dri2_display_create_surface(struct native_display *ndpy,
return NULL;
dri2surf->dri2dpy = dri2dpy;
- dri2surf->type = type;
dri2surf->drawable = drawable;
- dri2surf->color_format = dri2conf->base.color_format;
+ dri2surf->color_format = color_format;
dri2surf->base.destroy = dri2_surface_destroy;
dri2surf->base.present = dri2_surface_present;
@@ -480,8 +471,8 @@ dri2_display_create_window_surface(struct native_display *ndpy,
{
struct dri2_surface *dri2surf;
- dri2surf = dri2_display_create_surface(ndpy, DRI2_SURFACE_TYPE_WINDOW,
- (Drawable) win, nconf);
+ dri2surf = dri2_display_create_surface(ndpy,
+ (Drawable) win, nconf->color_format);
return (dri2surf) ? &dri2surf->base : NULL;
}
@@ -492,8 +483,29 @@ dri2_display_create_pixmap_surface(struct native_display *ndpy,
{
struct dri2_surface *dri2surf;
- dri2surf = dri2_display_create_surface(ndpy, DRI2_SURFACE_TYPE_PIXMAP,
- (Drawable) pix, nconf);
+ if (!nconf) {
+ struct dri2_display *dri2dpy = dri2_display(ndpy);
+ uint depth, nconf_depth;
+ int i;
+
+ depth = x11_drawable_get_depth(dri2dpy->xscr, (Drawable) pix);
+ for (i = 0; i < dri2dpy->num_configs; i++) {
+ nconf_depth = util_format_get_blocksizebits(
+ dri2dpy->configs[i].base.color_format);
+ /* simple depth match for now */
+ if (depth == nconf_depth ||
+ (depth == 24 && depth + 8 == nconf_depth)) {
+ nconf = &dri2dpy->configs[i].base;
+ break;
+ }
+ }
+
+ if (!nconf)
+ return NULL;
+ }
+
+ dri2surf = dri2_display_create_surface(ndpy,
+ (Drawable) pix, nconf->color_format);
return (dri2surf) ? &dri2surf->base : NULL;
}
@@ -529,7 +541,7 @@ is_format_supported(struct pipe_screen *screen,
{
return screen->is_format_supported(screen, fmt, PIPE_TEXTURE_2D, sample_count,
(is_color) ? PIPE_BIND_RENDER_TARGET :
- PIPE_BIND_DEPTH_STENCIL, 0);
+ PIPE_BIND_DEPTH_STENCIL);
}
static boolean
@@ -548,6 +560,10 @@ dri2_display_convert_config(struct native_display *ndpy,
if (!mode->xRenderable || !mode->drawableType)
return FALSE;
+ /* fast/slow configs are probably not relevant */
+ if (mode->visualRating == GLX_SLOW_CONFIG)
+ return FALSE;
+
nconf->buffer_mask = 1 << NATIVE_ATTACHMENT_FRONT_LEFT;
if (mode->doubleBufferMode)
nconf->buffer_mask |= 1 << NATIVE_ATTACHMENT_BACK_LEFT;
@@ -568,17 +584,33 @@ dri2_display_convert_config(struct native_display *ndpy,
if (nconf->color_format == PIPE_FORMAT_NONE)
return FALSE;
- if (mode->drawableType & GLX_WINDOW_BIT)
+ if ((mode->drawableType & GLX_WINDOW_BIT) && mode->visualID)
nconf->window_bit = TRUE;
if (mode->drawableType & GLX_PIXMAP_BIT)
nconf->pixmap_bit = TRUE;
nconf->native_visual_id = mode->visualID;
- nconf->native_visual_type = mode->visualType;
+ switch (mode->visualType) {
+ case GLX_TRUE_COLOR:
+ nconf->native_visual_type = TrueColor;
+ break;
+ case GLX_DIRECT_COLOR:
+ nconf->native_visual_type = DirectColor;
+ break;
+ case GLX_PSEUDO_COLOR:
+ nconf->native_visual_type = PseudoColor;
+ break;
+ case GLX_STATIC_COLOR:
+ nconf->native_visual_type = StaticColor;
+ break;
+ case GLX_GRAY_SCALE:
+ nconf->native_visual_type = GrayScale;
+ break;
+ case GLX_STATIC_GRAY:
+ nconf->native_visual_type = StaticGray;
+ break;
+ }
nconf->level = mode->level;
- nconf->samples = mode->samples;
-
- nconf->slow_config = (mode->visualRating == GLX_SLOW_CONFIG);
if (mode->transparentPixel == GLX_TRANSPARENT_RGB) {
nconf->transparent_rgb = TRUE;
@@ -614,8 +646,17 @@ dri2_display_get_configs(struct native_display *ndpy, int *num_configs)
count = 0;
for (i = 0; i < num_modes; i++) {
struct native_config *nconf = &dri2dpy->configs[count].base;
- if (dri2_display_convert_config(&dri2dpy->base, modes, nconf))
- count++;
+
+ if (dri2_display_convert_config(&dri2dpy->base, modes, nconf)) {
+ int j;
+ /* look for duplicates */
+ for (j = 0; j < count; j++) {
+ if (memcmp(&dri2dpy->configs[j], nconf, sizeof(*nconf)) == 0)
+ break;
+ }
+ if (j == count)
+ count++;
+ }
modes = modes->next;
}
@@ -741,7 +782,7 @@ dri2_display_init_screen(struct native_display *ndpy)
dri2dpy->event_handler->new_drm_screen(&dri2dpy->base,
dri2dpy->dri_driver, fd);
if (!dri2dpy->base.screen) {
- _eglLog(_EGL_WARNING, "failed to create DRM screen");
+ _eglLog(_EGL_DEBUG, "failed to create DRM screen");
return FALSE;
}
@@ -758,7 +799,7 @@ dri2_display_hash_table_hash(void *key)
static int
dri2_display_hash_table_compare(void *key1, void *key2)
{
- return (key1 - key2);
+ return ((char *) key1 - (char *) key2);
}
struct native_display *
diff --git a/src/gallium/state_trackers/egl/x11/native_x11.c b/src/gallium/state_trackers/egl/x11/native_x11.c
index 37c8b01541..a0bcad4c73 100644
--- a/src/gallium/state_trackers/egl/x11/native_x11.c
+++ b/src/gallium/state_trackers/egl/x11/native_x11.c
@@ -30,25 +30,30 @@
#include "native_x11.h"
+static struct native_event_handler *x11_event_handler;
+
+static void
+native_set_event_handler(struct native_event_handler *event_handler)
+{
+ x11_event_handler = event_handler;
+}
+
static struct native_display *
-native_create_display(void *dpy, struct native_event_handler *event_handler,
- void *user_data)
+native_create_display(void *dpy, boolean use_sw, void *user_data)
{
struct native_display *ndpy = NULL;
boolean force_sw;
force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE);
- if (!force_sw) {
- ndpy = x11_create_dri2_display((Display *) dpy,
- event_handler, user_data);
- }
-
- if (!ndpy) {
- EGLint level = (force_sw) ? _EGL_INFO : _EGL_WARNING;
- _eglLog(level, "use software fallback");
+ if (force_sw || use_sw) {
+ _eglLog(_EGL_INFO, "use software fallback");
ndpy = x11_create_ximage_display((Display *) dpy,
- event_handler, user_data);
+ x11_event_handler, user_data);
+ }
+ else {
+ ndpy = x11_create_dri2_display((Display *) dpy,
+ x11_event_handler, user_data);
}
return ndpy;
@@ -56,6 +61,7 @@ native_create_display(void *dpy, struct native_event_handler *event_handler,
static const struct native_platform x11_platform = {
"X11", /* name */
+ native_set_event_handler,
native_create_display
};
diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c
index 84811fb6e1..8e32c6ff0c 100644
--- a/src/gallium/state_trackers/egl/x11/native_ximage.c
+++ b/src/gallium/state_trackers/egl/x11/native_ximage.c
@@ -38,11 +38,6 @@
#include "native_x11.h"
#include "x11_screen.h"
-enum ximage_surface_type {
- XIMAGE_SURFACE_TYPE_WINDOW,
- XIMAGE_SURFACE_TYPE_PIXMAP,
-};
-
struct ximage_display {
struct native_display base;
Display *dpy;
@@ -60,7 +55,6 @@ struct ximage_display {
struct ximage_surface {
struct native_surface base;
Drawable drawable;
- enum ximage_surface_type type;
enum pipe_format color_format;
XVisualInfo visual;
struct ximage_display *xdpy;
@@ -245,7 +239,6 @@ ximage_surface_destroy(struct native_surface *nsurf)
static struct ximage_surface *
ximage_display_create_surface(struct native_display *ndpy,
- enum ximage_surface_type type,
Drawable drawable,
const struct native_config *nconf)
{
@@ -258,7 +251,6 @@ ximage_display_create_surface(struct native_display *ndpy,
return NULL;
xsurf->xdpy = xdpy;
- xsurf->type = type;
xsurf->color_format = xconf->base.color_format;
xsurf->drawable = drawable;
@@ -297,11 +289,37 @@ ximage_display_create_window_surface(struct native_display *ndpy,
{
struct ximage_surface *xsurf;
- xsurf = ximage_display_create_surface(ndpy, XIMAGE_SURFACE_TYPE_WINDOW,
- (Drawable) win, nconf);
+ xsurf = ximage_display_create_surface(ndpy, (Drawable) win, nconf);
return (xsurf) ? &xsurf->base : NULL;
}
+static enum pipe_format
+get_pixmap_format(struct native_display *ndpy, EGLNativePixmapType pix)
+{
+ struct ximage_display *xdpy = ximage_display(ndpy);
+ enum pipe_format fmt;
+ uint depth;
+
+ depth = x11_drawable_get_depth(xdpy->xscr, (Drawable) pix);
+
+ switch (depth) {
+ case 32:
+ fmt = PIPE_FORMAT_B8G8R8A8_UNORM;
+ break;
+ case 24:
+ fmt = PIPE_FORMAT_B8G8R8X8_UNORM;
+ break;
+ case 16:
+ fmt = PIPE_FORMAT_B5G6R5_UNORM;
+ break;
+ default:
+ fmt = PIPE_FORMAT_NONE;
+ break;
+ }
+
+ return fmt;
+}
+
static struct native_surface *
ximage_display_create_pixmap_surface(struct native_display *ndpy,
EGLNativePixmapType pix,
@@ -309,8 +327,26 @@ ximage_display_create_pixmap_surface(struct native_display *ndpy,
{
struct ximage_surface *xsurf;
- xsurf = ximage_display_create_surface(ndpy, XIMAGE_SURFACE_TYPE_PIXMAP,
- (Drawable) pix, nconf);
+ /* find the config */
+ if (!nconf) {
+ struct ximage_display *xdpy = ximage_display(ndpy);
+ enum pipe_format fmt = get_pixmap_format(&xdpy->base, pix);
+ int i;
+
+ if (fmt != PIPE_FORMAT_NONE) {
+ for (i = 0; i < xdpy->num_configs; i++) {
+ if (xdpy->configs[i].base.color_format == fmt) {
+ nconf = &xdpy->configs[i].base;
+ break;
+ }
+ }
+ }
+
+ if (!nconf)
+ return NULL;
+ }
+
+ xsurf = ximage_display_create_surface(ndpy, (Drawable) pix, nconf);
return (xsurf) ? &xsurf->base : NULL;
}
@@ -384,8 +420,6 @@ ximage_display_get_configs(struct native_display *ndpy, int *num_configs)
xconf->base.native_visual_type = xconf->visual->class;
#endif
- xconf->base.slow_config = TRUE;
-
count++;
}
@@ -408,24 +442,7 @@ ximage_display_is_pixmap_supported(struct native_display *ndpy,
const struct native_config *nconf)
{
struct ximage_display *xdpy = ximage_display(ndpy);
- enum pipe_format fmt;
- uint depth;
-
- depth = x11_drawable_get_depth(xdpy->xscr, (Drawable) pix);
- switch (depth) {
- case 32:
- fmt = PIPE_FORMAT_B8G8R8A8_UNORM;
- break;
- case 24:
- fmt = PIPE_FORMAT_B8G8R8X8_UNORM;
- break;
- case 16:
- fmt = PIPE_FORMAT_B5G6R5_UNORM;
- break;
- default:
- fmt = PIPE_FORMAT_NONE;
- break;
- }
+ enum pipe_format fmt = get_pixmap_format(&xdpy->base, pix);
return (fmt == nconf->color_format);
}
@@ -459,7 +476,7 @@ ximage_display_destroy(struct native_display *ndpy)
if (xdpy->configs)
FREE(xdpy->configs);
- xdpy->base.screen->destroy(xdpy->base.screen);
+ ndpy_uninit(ndpy);
x11_screen_destroy(xdpy->xscr);
if (xdpy->own_dpy)
diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c
index 205a7e3c19..351267bae4 100644
--- a/src/gallium/state_trackers/glx/xlib/glx_api.c
+++ b/src/gallium/state_trackers/glx/xlib/glx_api.c
@@ -1367,8 +1367,8 @@ glXSwapBuffers( Display *dpy, GLXDrawable drawable )
/*** GLX_MESA_copy_sub_buffer ***/
PUBLIC void
-glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable,
- int x, int y, int width, int height )
+glXCopySubBufferMESA(Display *dpy, GLXDrawable drawable,
+ int x, int y, int width, int height)
{
XMesaBuffer buffer = XMesaFindBuffer( dpy, drawable );
if (buffer) {
@@ -1718,8 +1718,8 @@ glXGetClientString( Display *dpy, int name )
PUBLIC int
-glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config,
- int attribute, int *value )
+glXGetFBConfigAttrib(Display *dpy, GLXFBConfig config,
+ int attribute, int *value)
{
XMesaVisual v = (XMesaVisual) config;
(void) dpy;
@@ -1763,8 +1763,8 @@ glXGetFBConfigs( Display *dpy, int screen, int *nelements )
PUBLIC GLXFBConfig *
-glXChooseFBConfig( Display *dpy, int screen,
- const int *attribList, int *nitems )
+glXChooseFBConfig(Display *dpy, int screen,
+ const int *attribList, int *nitems)
{
XMesaVisual xmvis;
@@ -1814,8 +1814,8 @@ glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config )
PUBLIC GLXWindow
-glXCreateWindow( Display *dpy, GLXFBConfig config, Window win,
- const int *attribList )
+glXCreateWindow(Display *dpy, GLXFBConfig config, Window win,
+ const int *attribList)
{
XMesaVisual xmvis = (XMesaVisual) config;
XMesaBuffer xmbuf;
@@ -1845,8 +1845,8 @@ glXDestroyWindow( Display *dpy, GLXWindow window )
/* XXX untested */
PUBLIC GLXPixmap
-glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap,
- const int *attribList )
+glXCreatePixmap(Display *dpy, GLXFBConfig config, Pixmap pixmap,
+ const int *attribList)
{
XMesaVisual v = (XMesaVisual) config;
XMesaBuffer b;
@@ -1965,8 +1965,7 @@ glXDestroyPixmap( Display *dpy, GLXPixmap pixmap )
PUBLIC GLXPbuffer
-glXCreatePbuffer( Display *dpy, GLXFBConfig config,
- const int *attribList )
+glXCreatePbuffer(Display *dpy, GLXFBConfig config, const int *attribList)
{
XMesaVisual xmvis = (XMesaVisual) config;
XMesaBuffer xmbuf;
@@ -2038,8 +2037,8 @@ glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf )
PUBLIC void
-glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute,
- unsigned int *value )
+glXQueryDrawable(Display *dpy, GLXDrawable draw, int attribute,
+ unsigned int *value)
{
GLuint width, height;
XMesaBuffer xmbuf = XMesaFindBuffer(dpy, draw);
@@ -2085,7 +2084,7 @@ glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute,
PUBLIC GLXContext
glXCreateNewContext( Display *dpy, GLXFBConfig config,
- int renderType, GLXContext shareList, Bool direct )
+ int renderType, GLXContext shareList, Bool direct )
{
GLXContext glxCtx;
GLXContext shareCtx = shareList;
@@ -2156,8 +2155,7 @@ glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask )
PUBLIC void
-glXGetSelectedEvent( Display *dpy, GLXDrawable drawable,
- unsigned long *mask )
+glXGetSelectedEvent(Display *dpy, GLXDrawable drawable, unsigned long *mask)
{
XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable);
if (xmbuf)
@@ -2209,7 +2207,8 @@ glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count)
/*** GLX_SGI_make_current_read ***/
PUBLIC Bool
-glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx)
+glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read,
+ GLXContext ctx)
{
return glXMakeContextCurrent( dpy, draw, read, ctx );
}
@@ -2227,7 +2226,8 @@ glXGetCurrentReadDrawableSGI(void)
#if defined(_VL_H)
PUBLIC GLXVideoSourceSGIX
-glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode)
+glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server,
+ VLPath path, int nodeClass, VLNode drainNode)
{
(void) dpy;
(void) screen;
@@ -2273,7 +2273,8 @@ glXImportContextEXT(Display *dpy, GLXContextID contextID)
}
PUBLIC int
-glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *value)
+glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute,
+ int *value)
{
(void) dpy;
(void) context;
@@ -2287,20 +2288,24 @@ glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *val
/*** GLX_SGIX_fbconfig ***/
PUBLIC int
-glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value)
+glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config,
+ int attribute, int *value)
{
return glXGetFBConfigAttrib(dpy, config, attribute, value);
}
PUBLIC GLXFBConfigSGIX *
-glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements)
+glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list,
+ int *nelements)
{
- return (GLXFBConfig *) glXChooseFBConfig(dpy, screen, attrib_list, nelements);
+ return (GLXFBConfig *) glXChooseFBConfig(dpy, screen,
+ attrib_list, nelements);
}
PUBLIC GLXPixmap
-glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap)
+glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config,
+ Pixmap pixmap)
{
XMesaVisual xmvis = (XMesaVisual) config;
XMesaBuffer xmbuf = XMesaCreatePixmapBuffer(xmvis, pixmap, 0);
@@ -2309,7 +2314,9 @@ glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pi
PUBLIC GLXContext
-glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct)
+glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config,
+ int render_type, GLXContext share_list,
+ Bool direct)
{
XMesaVisual xmvis = (XMesaVisual) config;
GLXContext glxCtx;
@@ -2362,8 +2369,8 @@ glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis)
PUBLIC GLXPbufferSGIX
glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config,
- unsigned int width, unsigned int height,
- int *attribList)
+ unsigned int width, unsigned int height,
+ int *attribList)
{
XMesaVisual xmvis = (XMesaVisual) config;
XMesaBuffer xmbuf;
@@ -2410,7 +2417,8 @@ glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf)
PUBLIC int
-glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value)
+glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute,
+ unsigned int *value)
{
const XMesaBuffer xmbuf = XMesaFindBuffer(dpy, pbuf);
@@ -2454,7 +2462,8 @@ glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask)
PUBLIC void
-glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask)
+glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable,
+ unsigned long *mask)
{
XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable);
if (xmbuf) {
@@ -2482,7 +2491,8 @@ glXCushionSGI(Display *dpy, Window win, float cushion)
/*** GLX_SGIX_video_resize ***/
PUBLIC int
-glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window)
+glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel,
+ Window window)
{
(void) dpy;
(void) screen;
@@ -2492,7 +2502,8 @@ glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window
}
PUBLIC int
-glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h)
+glXChannelRectSGIX(Display *dpy, int screen, int channel,
+ int x, int y, int w, int h)
{
(void) dpy;
(void) screen;
@@ -2505,7 +2516,8 @@ glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, i
}
PUBLIC int
-glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h)
+glXQueryChannelRectSGIX(Display *dpy, int screen, int channel,
+ int *x, int *y, int *w, int *h)
{
(void) dpy;
(void) screen;
@@ -2518,7 +2530,8 @@ glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, i
}
PUBLIC int
-glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh)
+glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel,
+ int *dx, int *dy, int *dw, int *dh)
{
(void) dpy;
(void) screen;
@@ -2546,7 +2559,8 @@ glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype)
#if defined(_DM_BUFFER_H_)
PUBLIC Bool
-glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer)
+glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer,
+ DMparams *params, DMbuffer dmbuffer)
{
(void) dpy;
(void) pbuffer;
@@ -2593,7 +2607,8 @@ glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max)
/*** GLX_SUN_get_transparent_index ***/
PUBLIC Status
-glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent)
+glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay,
+ long *pTransparent)
{
(void) dpy;
(void) overlay;
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c
index 7c47a25ceb..3b35cbc890 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -376,8 +376,6 @@ choose_depth_stencil_format(XMesaDisplay xmdpy, int depth, int stencil)
{
const enum pipe_texture_target target = PIPE_TEXTURE_2D;
const unsigned tex_usage = PIPE_BIND_DEPTH_STENCIL;
- const unsigned geom_flags = (PIPE_TEXTURE_GEOM_NON_SQUARE |
- PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO);
const unsigned sample_count = 0;
enum pipe_format formats[8], fmt;
int count, i;
@@ -403,7 +401,7 @@ choose_depth_stencil_format(XMesaDisplay xmdpy, int depth, int stencil)
for (i = 0; i < count; i++) {
if (xmdpy->screen->is_format_supported(xmdpy->screen, formats[i],
target, sample_count,
- tex_usage, geom_flags)) {
+ tex_usage)) {
fmt = formats[i];
break;
}
@@ -1195,11 +1193,7 @@ void XMesaSwapBuffers( XMesaBuffer b )
XMesaContext xmctx = XMesaGetCurrentContext();
if (xmctx && xmctx->xm_buffer == b) {
- xmctx->st->flush( xmctx->st,
- PIPE_FLUSH_RENDER_CACHE |
- PIPE_FLUSH_SWAPBUFFERS |
- PIPE_FLUSH_FRAME,
- NULL);
+ xmctx->st->flush( xmctx->st, ST_FLUSH_FRONT, NULL);
}
xmesa_swap_st_framebuffer(b->stfb);
@@ -1225,9 +1219,10 @@ void XMesaFlush( XMesaContext c )
XMesaDisplay xmdpy = xmesa_init_display(c->xm_visual->display);
struct pipe_fence_handle *fence = NULL;
- c->st->flush(c->st, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, &fence);
+ c->st->flush(c->st, ST_FLUSH_FRONT, &fence);
if (fence) {
- xmdpy->screen->fence_finish(xmdpy->screen, fence, 0);
+ xmdpy->screen->fence_finish(xmdpy->screen, fence,
+ PIPE_TIMEOUT_INFINITE);
xmdpy->screen->fence_reference(xmdpy->screen, &fence, NULL);
}
XFlush( c->xm_visual->display );
diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i
index 5bdeaa8e4e..17d25ad0aa 100644
--- a/src/gallium/state_trackers/python/p_context.i
+++ b/src/gallium/state_trackers/python/p_context.i
@@ -268,7 +268,6 @@ struct st_context {
void set_vertex_buffer(unsigned index,
unsigned stride,
- unsigned max_index,
unsigned buffer_offset,
struct pipe_resource *buffer)
{
@@ -277,7 +276,6 @@ struct st_context {
memset(&state, 0, sizeof(state));
state.stride = stride;
- state.max_index = max_index;
state.buffer_offset = buffer_offset;
state.buffer = buffer;
@@ -352,6 +350,7 @@ struct st_context {
vbuf = pipe_buffer_create(screen,
PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STATIC,
size);
if(!vbuf)
goto error1;
@@ -360,7 +359,7 @@ struct st_context {
if (!map)
goto error2;
memcpy(map, vertices, size);
- pipe_buffer_unmap(pipe, vbuf, transfer);
+ pipe_buffer_unmap(pipe, transfer);
cso_save_vertex_elements($self->cso);
@@ -378,7 +377,6 @@ struct st_context {
vbuffer.buffer = vbuf;
vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */
vbuffer.buffer_offset = 0;
- vbuffer.max_index = num_verts - 1;
pipe->set_vertex_buffers(pipe, 1, &vbuffer);
/* draw */
@@ -402,10 +400,10 @@ error1:
void
flush(unsigned flags = 0) {
struct pipe_fence_handle *fence = NULL;
- $self->pipe->flush($self->pipe, flags | PIPE_FLUSH_RENDER_CACHE, &fence);
+ $self->pipe->flush($self->pipe, &fence);
if(fence) {
/* TODO: allow asynchronous operation */
- $self->pipe->screen->fence_finish( $self->pipe->screen, fence, 0 );
+ $self->pipe->screen->fence_finish( $self->pipe->screen, fence, PIPE_TIMEOUT_INFINITE );
$self->pipe->screen->fence_reference( $self->pipe->screen, &fence, NULL );
}
}
diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i
index d55086fefd..61f6b3bb73 100644
--- a/src/gallium/state_trackers/python/p_device.i
+++ b/src/gallium/state_trackers/python/p_device.i
@@ -86,8 +86,7 @@ struct st_device {
int is_format_supported( enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned bind,
- unsigned geom_flags ) {
+ unsigned bind ) {
/* We can't really display surfaces with the python statetracker so mask
* out that usage */
bind &= ~PIPE_BIND_DISPLAY_TARGET;
@@ -96,8 +95,7 @@ struct st_device {
format,
target,
sample_count,
- bind,
- geom_flags );
+ bind );
}
struct st_context *
@@ -134,7 +132,7 @@ struct st_device {
}
struct pipe_resource *
- buffer_create(unsigned size, unsigned bind = 0) {
- return pipe_buffer_create($self->screen, bind, size);
+ buffer_create(unsigned size, unsigned usage, unsigned bind = 0) {
+ return pipe_buffer_create($self->screen, bind, usage, size);
}
};
diff --git a/src/gallium/state_trackers/vega/Makefile b/src/gallium/state_trackers/vega/Makefile
index 7342c124c2..3e8ad49b76 100644
--- a/src/gallium/state_trackers/vega/Makefile
+++ b/src/gallium/state_trackers/vega/Makefile
@@ -24,23 +24,25 @@ C_SOURCES = \
api_path.c \
api_text.c \
api_transform.c \
- vgu.c \
- vg_context.c \
- vg_manager.c \
- vg_state.c \
- vg_translate.c \
- polygon.c \
- bezier.c \
- path.c \
- paint.c \
arc.c \
+ bezier.c \
+ handle.c \
image.c \
+ mask.c \
+ paint.c \
+ path.c \
+ polygon.c \
renderer.c \
+ shader.c \
+ shaders_cache.c \
stroker.c \
- mask.c \
text.c \
- shader.c \
- shaders_cache.c
+ vg_context.c \
+ vg_manager.c \
+ vg_state.c \
+ vg_translate.c \
+ vgu.c
+
GENERATED_SOURCES := api_tmp.h
diff --git a/src/gallium/state_trackers/vega/SConscript b/src/gallium/state_trackers/vega/SConscript
index 4900135a1c..f3732aa157 100644
--- a/src/gallium/state_trackers/vega/SConscript
+++ b/src/gallium/state_trackers/vega/SConscript
@@ -26,23 +26,24 @@ vega_sources = [
'api_path.c',
'api_text.c',
'api_transform.c',
- 'vgu.c',
- 'vg_context.c',
- 'vg_manager.c',
- 'vg_state.c',
- 'vg_translate.c',
- 'polygon.c',
- 'bezier.c',
- 'path.c',
- 'paint.c',
'arc.c',
+ 'bezier.c',
+ 'handle.c',
'image.c',
- 'renderer.c',
- 'stroker.c',
'mask.c',
+ 'paint.c',
+ 'path.c',
+ 'polygon.c',
+ 'renderer.c',
'shader.c',
'shaders_cache.c',
+ 'stroker.c',
'text.c',
+ 'vg_context.c',
+ 'vg_manager.c',
+ 'vg_state.c',
+ 'vg_translate.c',
+ 'vgu.c'
]
api_tmp = env.CodeGenerate(
diff --git a/src/gallium/state_trackers/vega/api_context.c b/src/gallium/state_trackers/vega/api_context.c
index d6bbda5e07..19e42dd156 100644
--- a/src/gallium/state_trackers/vega/api_context.c
+++ b/src/gallium/state_trackers/vega/api_context.c
@@ -56,7 +56,7 @@ void vegaFlush(void)
return;
pipe = ctx->pipe;
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+ pipe->flush(pipe, NULL);
vg_manager_flush_frontbuffer(ctx);
}
@@ -72,9 +72,10 @@ void vegaFinish(void)
pipe = ctx->pipe;
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, &fence);
+ pipe->flush(pipe, &fence);
if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_finish(pipe->screen, fence,
+ PIPE_TIMEOUT_INFINITE);
pipe->screen->fence_reference(pipe->screen, &fence, NULL);
}
}
diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c
index 724e38241b..6be460c495 100644
--- a/src/gallium/state_trackers/vega/api_filters.c
+++ b/src/gallium/state_trackers/vega/api_filters.c
@@ -29,6 +29,7 @@
#include "vg_context.h"
#include "image.h"
#include "api.h"
+#include "handle.h"
#include "renderer.h"
#include "shaders_cache.h"
@@ -251,8 +252,8 @@ void vegaColorMatrix(VGImage dst, VGImage src,
return;
}
- d = (struct vg_image*)dst;
- s = (struct vg_image*)src;
+ d = handle_to_image(dst);
+ s = handle_to_image(src);
if (vg_image_overlaps(d, s)) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
@@ -293,7 +294,7 @@ void vegaConvolve(VGImage dst, VGImage src,
struct vg_image *d, *s;
VGint kernel_size = kernelWidth * kernelHeight;
struct filter_info info;
- const VGint max_kernel_size = vgGeti(VG_MAX_KERNEL_SIZE);
+ const VGint max_kernel_size = vegaGeti(VG_MAX_KERNEL_SIZE);
if (dst == VG_INVALID_HANDLE || src == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
@@ -317,8 +318,8 @@ void vegaConvolve(VGImage dst, VGImage src,
return;
}
- d = (struct vg_image*)dst;
- s = (struct vg_image*)src;
+ d = handle_to_image(dst);
+ s = handle_to_image(src);
if (vg_image_overlaps(d, s)) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
@@ -395,7 +396,7 @@ void vegaSeparableConvolve(VGImage dst, VGImage src,
struct vg_context *ctx = vg_current_context();
VGshort *kernel;
VGint i, j, idx = 0;
- const VGint max_kernel_size = vgGeti(VG_MAX_SEPARABLE_KERNEL_SIZE);
+ const VGint max_kernel_size = vegaGeti(VG_MAX_SEPARABLE_KERNEL_SIZE);
if (dst == VG_INVALID_HANDLE || src == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
@@ -425,8 +426,8 @@ void vegaSeparableConvolve(VGImage dst, VGImage src,
++idx;
}
}
- vgConvolve(dst, src, kernelWidth, kernelHeight, shiftX, shiftY,
- kernel, scale, bias, tilingMode);
+ vegaConvolve(dst, src, kernelWidth, kernelHeight, shiftX, shiftY,
+ kernel, scale, bias, tilingMode);
free(kernel);
}
@@ -502,8 +503,8 @@ void vegaGaussianBlur(VGImage dst, VGImage src,
return;
}
- d = (struct vg_image*)dst;
- s = (struct vg_image*)src;
+ d = handle_to_image(dst);
+ s = handle_to_image(src);
if (vg_image_overlaps(d, s)) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
@@ -599,8 +600,8 @@ void vegaLookup(VGImage dst, VGImage src,
return;
}
- d = (struct vg_image*)dst;
- s = (struct vg_image*)src;
+ d = handle_to_image(dst);
+ s = handle_to_image(src);
if (vg_image_overlaps(d, s)) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
@@ -662,8 +663,8 @@ void vegaLookupSingle(VGImage dst, VGImage src,
return;
}
- d = (struct vg_image*)dst;
- s = (struct vg_image*)src;
+ d = handle_to_image(dst);
+ s = handle_to_image(src);
if (vg_image_overlaps(d, s)) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
diff --git a/src/gallium/state_trackers/vega/api_images.c b/src/gallium/state_trackers/vega/api_images.c
index ad95409cd0..2cb5622f81 100644
--- a/src/gallium/state_trackers/vega/api_images.c
+++ b/src/gallium/state_trackers/vega/api_images.c
@@ -32,6 +32,7 @@
#include "vg_translate.h"
#include "api_consts.h"
#include "api.h"
+#include "handle.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
@@ -105,12 +106,12 @@ VGImage vegaCreateImage(VGImageFormat format,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return VG_INVALID_HANDLE;
}
- if (width > vgGeti(VG_MAX_IMAGE_WIDTH) ||
- height > vgGeti(VG_MAX_IMAGE_HEIGHT)) {
+ if (width > vegaGeti(VG_MAX_IMAGE_WIDTH) ||
+ height > vegaGeti(VG_MAX_IMAGE_HEIGHT)) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return VG_INVALID_HANDLE;
}
- if (width * height > vgGeti(VG_MAX_IMAGE_PIXELS)) {
+ if (width * height > vegaGeti(VG_MAX_IMAGE_PIXELS)) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return VG_INVALID_HANDLE;
}
@@ -122,19 +123,19 @@ VGImage vegaCreateImage(VGImageFormat format,
return VG_INVALID_HANDLE;
}
- return (VGImage)image_create(format, width, height);
+ return image_to_handle(image_create(format, width, height));
}
void vegaDestroyImage(VGImage image)
{
struct vg_context *ctx = vg_current_context();
- struct vg_image *img = (struct vg_image *)image;
+ struct vg_image *img = handle_to_image(image);
if (image == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- if (!vg_object_is_valid((void*)image, VG_OBJECT_IMAGE)) {
+ if (!vg_object_is_valid(image, VG_OBJECT_IMAGE)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -157,7 +158,7 @@ void vegaClearImage(VGImage image,
return;
}
- img = (struct vg_image*)image;
+ img = handle_to_image(image);
if (x + width < 0 || y + height < 0)
return;
@@ -189,7 +190,7 @@ void vegaImageSubData(VGImage image,
return;
}
- img = (struct vg_image*)(image);
+ img = handle_to_image(image);
image_sub_data(img, data, dataStride, dataFormat,
x, y, width, height);
}
@@ -216,7 +217,7 @@ void vegaGetImageSubData(VGImage image,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return;
}
- img = (struct vg_image*)image;
+ img = handle_to_image(image);
image_get_sub_data(img, data, dataStride, dataFormat,
x, y, width, height);
}
@@ -229,8 +230,8 @@ VGImage vegaChildImage(VGImage parent,
struct vg_image *p;
if (parent == VG_INVALID_HANDLE ||
- !vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, (void*)parent) ||
- !vg_object_is_valid((void*)parent, VG_OBJECT_IMAGE)) {
+ !vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, parent) ||
+ !vg_object_is_valid(parent, VG_OBJECT_IMAGE)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return VG_INVALID_HANDLE;
}
@@ -238,7 +239,7 @@ VGImage vegaChildImage(VGImage parent,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return VG_INVALID_HANDLE;
}
- p = (struct vg_image *)parent;
+ p = handle_to_image(parent);
if (x > p->width || y > p->height) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return VG_INVALID_HANDLE;
@@ -248,7 +249,7 @@ VGImage vegaChildImage(VGImage parent,
return VG_INVALID_HANDLE;
}
- return (VGImage)image_child_image(p, x, y, width, height);
+ return image_to_handle(image_child_image(p, x, y, width, height));
}
VGImage vegaGetParent(VGImage image)
@@ -261,9 +262,9 @@ VGImage vegaGetParent(VGImage image)
return VG_INVALID_HANDLE;
}
- img = (struct vg_image*)image;
+ img = handle_to_image(image);
if (img->parent)
- return (VGImage)img->parent;
+ return image_to_handle(img->parent);
else
return image;
}
@@ -285,8 +286,8 @@ void vegaCopyImage(VGImage dst, VGint dx, VGint dy,
return;
}
vg_validate_state(ctx);
- image_copy((struct vg_image*)dst, dx, dy,
- (struct vg_image*)src, sx, sy,
+ image_copy(handle_to_image(dst), dx, dy,
+ handle_to_image(src), sx, sy,
width, height, dither);
}
@@ -303,7 +304,7 @@ void vegaDrawImage(VGImage image)
}
vg_validate_state(ctx);
- image_draw((struct vg_image*)image,
+ image_draw(handle_to_image(image),
&ctx->state.vg.image_user_to_surface_matrix);
}
@@ -323,7 +324,7 @@ void vegaSetPixels(VGint dx, VGint dy,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return;
}
- image_set_pixels(dx, dy, (struct vg_image*)src, sx, sy, width,
+ image_set_pixels(dx, dy, handle_to_image(src), sx, sy, width,
height);
}
@@ -343,7 +344,7 @@ void vegaGetPixels(VGImage dst, VGint dx, VGint dy,
return;
}
- img = (struct vg_image*)dst;
+ img = handle_to_image(dst);
image_get_pixels(img, dx, dy,
sx, sy, width, height);
@@ -355,7 +356,6 @@ void vegaWritePixels(const void * data, VGint dataStride,
VGint width, VGint height)
{
struct vg_context *ctx = vg_current_context();
- struct pipe_context *pipe = ctx->pipe;
if (!supported_image_format(dataFormat)) {
vg_set_error(ctx, VG_UNSUPPORTED_IMAGE_FORMAT_ERROR);
@@ -386,8 +386,6 @@ void vegaWritePixels(const void * data, VGint dataStride,
#endif
image_destroy(img);
}
- /* make sure rendering has completed */
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
}
void vegaReadPixels(void * data, VGint dataStride,
@@ -420,8 +418,6 @@ void vegaReadPixels(void * data, VGint dataStride,
return;
}
- /* make sure rendering has completed */
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
if (sx < 0) {
xoffset = -sx;
xoffset *= _vega_size_for_format(dataFormat);
@@ -487,7 +483,7 @@ void vegaCopyPixels(VGint dx, VGint dy,
vg_validate_state(ctx);
/* make sure rendering has completed */
- vgFinish();
+ vegaFinish();
vg_copy_surface(ctx, strb->surface, dx, dy,
strb->surface, sx, sy, width, height);
diff --git a/src/gallium/state_trackers/vega/api_masks.c b/src/gallium/state_trackers/vega/api_masks.c
index beb15c33a5..cdbf0026e8 100644
--- a/src/gallium/state_trackers/vega/api_masks.c
+++ b/src/gallium/state_trackers/vega/api_masks.c
@@ -28,14 +28,11 @@
#include "mask.h"
#include "api.h"
+#include "handle.h"
#include "renderer.h"
#include "vg_context.h"
#include "pipe/p_context.h"
-#include "util/u_inlines.h"
-
-#include "util/u_pack_color.h"
-#include "util/u_draw_quad.h"
void vegaMask(VGHandle mask, VGMaskOperation operation,
VGint x, VGint y,
@@ -60,11 +57,11 @@ void vegaMask(VGHandle mask, VGMaskOperation operation,
mask_fill(x, y, width, height, 0.f);
} else if (operation == VG_FILL_MASK) {
mask_fill(x, y, width, height, 1.f);
- } else if (vg_object_is_valid((void*)mask, VG_OBJECT_IMAGE)) {
- struct vg_image *image = (struct vg_image *)mask;
+ } else if (vg_object_is_valid(mask, VG_OBJECT_IMAGE)) {
+ struct vg_image *image = handle_to_image(mask);
mask_using_image(image, operation, x, y, width, height);
- } else if (vg_object_is_valid((void*)mask, VG_OBJECT_MASK)) {
- struct vg_mask_layer *layer = (struct vg_mask_layer *)mask;
+ } else if (vg_object_is_valid(mask, VG_OBJECT_MASK)) {
+ struct vg_mask_layer *layer = handle_to_masklayer(mask);
mask_using_layer(layer, operation, x, y, width, height);
} else {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
@@ -127,14 +124,14 @@ void vegaRenderToMask(VGPath path,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return;
}
- if (!vg_object_is_valid((void*)path, VG_OBJECT_PATH)) {
+ if (!vg_object_is_valid(path, VG_OBJECT_PATH)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
vg_validate_state(ctx);
- mask_render_to((struct path *)path, paintModes, operation);
+ mask_render_to(handle_to_path(path), paintModes, operation);
}
VGMaskLayer vegaCreateMaskLayer(VGint width, VGint height)
@@ -142,13 +139,13 @@ VGMaskLayer vegaCreateMaskLayer(VGint width, VGint height)
struct vg_context *ctx = vg_current_context();
if (width <= 0 || height <= 0 ||
- width > vgGeti(VG_MAX_IMAGE_WIDTH) ||
- height > vgGeti(VG_MAX_IMAGE_HEIGHT)) {
+ width > vegaGeti(VG_MAX_IMAGE_WIDTH) ||
+ height > vegaGeti(VG_MAX_IMAGE_HEIGHT)) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return VG_INVALID_HANDLE;
}
- return (VGMaskLayer)mask_layer_create(width, height);
+ return masklayer_to_handle(mask_layer_create(width, height));
}
void vegaDestroyMaskLayer(VGMaskLayer maskLayer)
@@ -160,12 +157,12 @@ void vegaDestroyMaskLayer(VGMaskLayer maskLayer)
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- if (!vg_object_is_valid((void*)maskLayer, VG_OBJECT_MASK)) {
+ if (!vg_object_is_valid(maskLayer, VG_OBJECT_MASK)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- mask = (struct vg_mask_layer *)maskLayer;
+ mask = handle_to_masklayer(maskLayer);
mask_layer_destroy(mask);
}
@@ -196,12 +193,12 @@ void vegaFillMaskLayer(VGMaskLayer maskLayer,
return;
}
- if (!vg_object_is_valid((void*)maskLayer, VG_OBJECT_MASK)) {
+ if (!vg_object_is_valid(maskLayer, VG_OBJECT_MASK)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- mask = (struct vg_mask_layer*)maskLayer;
+ mask = handle_to_masklayer(maskLayer);
if (x + width > mask_layer_width(mask) ||
y + height > mask_layer_height(mask)) {
@@ -230,14 +227,14 @@ void vegaCopyMask(VGMaskLayer maskLayer,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return;
}
- if (!vg_object_is_valid((void*)maskLayer, VG_OBJECT_MASK)) {
+ if (!vg_object_is_valid(maskLayer, VG_OBJECT_MASK)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
vg_validate_state(ctx);
- mask = (struct vg_mask_layer*)maskLayer;
+ mask = handle_to_masklayer(maskLayer);
mask_copy(mask, sx, sy, dx, dy, width, height);
}
diff --git a/src/gallium/state_trackers/vega/api_paint.c b/src/gallium/state_trackers/vega/api_paint.c
index 1411806455..2610ebe057 100644
--- a/src/gallium/state_trackers/vega/api_paint.c
+++ b/src/gallium/state_trackers/vega/api_paint.c
@@ -29,24 +29,24 @@
#include "vg_context.h"
#include "paint.h"
#include "api.h"
+#include "handle.h"
+
VGPaint vegaCreatePaint(void)
{
- return (VGPaint) paint_create(vg_current_context());
+ return paint_to_handle(paint_create(vg_current_context()));
}
void vegaDestroyPaint(VGPaint p)
{
struct vg_context *ctx = vg_current_context();
- struct vg_paint *paint;
if (p == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- paint = (struct vg_paint *)p;
- paint_destroy(paint);
+ paint_destroy(handle_to_paint(p));
}
void vegaSetPaint(VGPaint paint, VGbitfield paintModes)
@@ -55,8 +55,8 @@ void vegaSetPaint(VGPaint paint, VGbitfield paintModes)
if (paint == VG_INVALID_HANDLE) {
/* restore the default */
- paint = (VGPaint)ctx->default_paint;
- } else if (!vg_object_is_valid((void*)paint, VG_OBJECT_PAINT)) {
+ paint = paint_to_handle(ctx->default_paint);
+ } else if (!vg_object_is_valid(paint, VG_OBJECT_PAINT)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -67,11 +67,13 @@ void vegaSetPaint(VGPaint paint, VGbitfield paintModes)
}
if (paintModes & VG_FILL_PATH) {
- ctx->state.vg.fill_paint = (struct vg_paint *)paint;
+ ctx->state.vg.fill_paint = handle_to_paint(paint);
}
if (paintModes & VG_STROKE_PATH) {
- ctx->state.vg.stroke_paint = (struct vg_paint *)paint;
+ ctx->state.vg.stroke_paint = handle_to_paint(paint);
}
+
+ ctx->state.dirty |= PAINT_DIRTY;
}
VGPaint vegaGetPaint(VGPaintMode paintMode)
@@ -85,11 +87,11 @@ VGPaint vegaGetPaint(VGPaintMode paintMode)
}
if (paintMode == VG_FILL_PATH)
- paint = (VGPaint)ctx->state.vg.fill_paint;
+ paint = paint_to_handle(ctx->state.vg.fill_paint);
else if (paintMode == VG_STROKE_PATH)
- paint = (VGPaint)ctx->state.vg.stroke_paint;
+ paint = paint_to_handle(ctx->state.vg.stroke_paint);
- if (paint == (VGPaint)ctx->default_paint)
+ if (paint == paint_to_handle(ctx->default_paint))
paint = VG_INVALID_HANDLE;
return paint;
@@ -98,20 +100,24 @@ VGPaint vegaGetPaint(VGPaintMode paintMode)
void vegaSetColor(VGPaint paint, VGuint rgba)
{
struct vg_context *ctx = vg_current_context();
+ struct vg_paint *p;
if (paint == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- if (!vg_object_is_valid((void*)paint, VG_OBJECT_PAINT)) {
+ if (!vg_object_is_valid(paint, VG_OBJECT_PAINT)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- {
- struct vg_paint *p = (struct vg_paint *)paint;
- paint_set_colori(p, rgba);
- }
+
+ p = handle_to_paint(paint);
+ paint_set_colori(p, rgba);
+
+ if (ctx->state.vg.fill_paint == p ||
+ ctx->state.vg.stroke_paint == p)
+ ctx->state.dirty |= PAINT_DIRTY;
}
VGuint vegaGetColor(VGPaint paint)
@@ -125,11 +131,11 @@ VGuint vegaGetColor(VGPaint paint)
return rgba;
}
- if (!vg_object_is_valid((void*)paint, VG_OBJECT_PAINT)) {
+ if (!vg_object_is_valid(paint, VG_OBJECT_PAINT)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return rgba;
}
- p = (struct vg_paint *)paint;
+ p = handle_to_paint(paint);
return paint_colori(p);
}
@@ -139,28 +145,28 @@ void vegaPaintPattern(VGPaint paint, VGImage pattern)
struct vg_context *ctx = vg_current_context();
if (paint == VG_INVALID_HANDLE ||
- !vg_context_is_object_valid(ctx, VG_OBJECT_PAINT, (void *)paint)) {
+ !vg_context_is_object_valid(ctx, VG_OBJECT_PAINT, paint)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
if (pattern == VG_INVALID_HANDLE) {
- paint_set_type((struct vg_paint*)paint, VG_PAINT_TYPE_COLOR);
+ paint_set_type(handle_to_paint(paint), VG_PAINT_TYPE_COLOR);
return;
}
- if (!vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, (void *)pattern)) {
+ if (!vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, pattern)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- if (!vg_object_is_valid((void*)paint, VG_OBJECT_PAINT) ||
- !vg_object_is_valid((void*)pattern, VG_OBJECT_IMAGE)) {
+ if (!vg_object_is_valid(paint, VG_OBJECT_PAINT) ||
+ !vg_object_is_valid(pattern, VG_OBJECT_IMAGE)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- paint_set_pattern((struct vg_paint*)paint,
- (struct vg_image*)pattern);
+ paint_set_pattern(handle_to_paint(paint),
+ handle_to_image(pattern));
}
diff --git a/src/gallium/state_trackers/vega/api_params.c b/src/gallium/state_trackers/vega/api_params.c
index a73b6c3eff..aa1e5dd280 100644
--- a/src/gallium/state_trackers/vega/api_params.c
+++ b/src/gallium/state_trackers/vega/api_params.c
@@ -29,6 +29,7 @@
#include "vg_context.h"
#include "paint.h"
#include "path.h"
+#include "handle.h"
#include "image.h"
#include "text.h"
#include "matrix.h"
@@ -60,7 +61,7 @@ static INLINE VGboolean count_in_bounds(VGParamType type, VGint count)
else if (type == VG_STROKE_DASH_PATTERN) {
return count <= VEGA_MAX_DASH_COUNT;
} else {
- VGint real_count = vgGetVectorSize(type);
+ VGint real_count = vegaGetVectorSize(type);
return count == real_count;
}
}
@@ -103,7 +104,7 @@ void vegaSetf (VGParamType type, VGfloat value)
case VG_MAX_IMAGE_BYTES:
case VG_MAX_GAUSSIAN_STD_DEVIATION:
case VG_MAX_FLOAT:
- vgSeti(type, floor(value));
+ vegaSeti(type, floor(value));
return;
break;
case VG_STROKE_LINE_WIDTH:
@@ -289,7 +290,7 @@ void vegaSetfv(VGParamType type, VGint count,
case VG_FILTER_FORMAT_LINEAR:
case VG_FILTER_FORMAT_PREMULTIPLIED:
case VG_FILTER_CHANNEL_MASK:
- vgSeti(type, floor(values[0]));
+ vegaSeti(type, floor(values[0]));
return;
break;
case VG_SCISSOR_RECTS: {
@@ -416,7 +417,7 @@ void vegaSetiv(VGParamType type, VGint count,
case VG_FILTER_FORMAT_LINEAR:
case VG_FILTER_FORMAT_PREMULTIPLIED:
case VG_FILTER_CHANNEL_MASK:
- vgSeti(type, values[0]);
+ vegaSeti(type, values[0]);
return;
break;
case VG_SCISSOR_RECTS: {
@@ -536,7 +537,7 @@ VGfloat vegaGetf(VGParamType type)
case VG_FILTER_FORMAT_LINEAR:
case VG_FILTER_FORMAT_PREMULTIPLIED:
case VG_FILTER_CHANNEL_MASK:
- return vgGeti(type);
+ return vegaGeti(type);
break;
case VG_STROKE_LINE_WIDTH:
value = state->stroke.line_width.f;
@@ -558,7 +559,7 @@ VGfloat vegaGetf(VGParamType type)
case VG_MAX_IMAGE_PIXELS:
case VG_MAX_IMAGE_BYTES:
case VG_MAX_GAUSSIAN_STD_DEVIATION:
- return vgGeti(type);
+ return vegaGeti(type);
break;
case VG_MAX_FLOAT:
value = 1e+10;/*must be at least 1e+10*/
@@ -674,7 +675,7 @@ VGint vegaGeti(VGParamType type)
break;
case VG_MAX_FLOAT: {
- VGfloat val = vgGetf(type);
+ VGfloat val = vegaGetf(type);
value = float_to_int_floor(*((VGuint*)&val));
}
break;
@@ -765,7 +766,7 @@ void vegaGetfv(VGParamType type, VGint count,
{
const struct vg_state *state = current_state();
struct vg_context *ctx = vg_current_context();
- VGint real_count = vgGetVectorSize(type);
+ VGint real_count = vegaGetVectorSize(type);
if (!values || count <= 0 || count > real_count || !is_aligned(values)) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
@@ -802,10 +803,10 @@ void vegaGetfv(VGParamType type, VGint count,
case VG_MAX_IMAGE_PIXELS:
case VG_MAX_IMAGE_BYTES:
case VG_MAX_GAUSSIAN_STD_DEVIATION:
- values[0] = vgGeti(type);
+ values[0] = vegaGeti(type);
break;
case VG_MAX_FLOAT:
- values[0] = vgGetf(type);
+ values[0] = vegaGetf(type);
break;
case VG_SCISSOR_RECTS: {
VGint i;
@@ -866,7 +867,7 @@ void vegaGetiv(VGParamType type, VGint count,
{
const struct vg_state *state = current_state();
struct vg_context *ctx = vg_current_context();
- VGint real_count = vgGetVectorSize(type);
+ VGint real_count = vegaGetVectorSize(type);
if (!values || count <= 0 || count > real_count || !is_aligned(values)) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
@@ -903,10 +904,10 @@ void vegaGetiv(VGParamType type, VGint count,
case VG_MAX_IMAGE_PIXELS:
case VG_MAX_IMAGE_BYTES:
case VG_MAX_GAUSSIAN_STD_DEVIATION:
- values[0] = vgGeti(type);
+ values[0] = vegaGeti(type);
break;
case VG_MAX_FLOAT: {
- VGfloat val = vgGetf(type);
+ VGfloat val = vegaGetf(type);
values[0] = float_to_int_floor(*((VGuint*)&val));
}
break;
@@ -972,9 +973,9 @@ void vegaSetParameterf(VGHandle object,
VGfloat value)
{
struct vg_context *ctx = vg_current_context();
- void *ptr = (void*)object;
+ void *ptr = handle_to_pointer(object);
- if (!object || object == VG_INVALID_HANDLE || !is_aligned(ptr)) {
+ if (object == VG_INVALID_HANDLE || !is_aligned(ptr)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -983,7 +984,7 @@ void vegaSetParameterf(VGHandle object,
case VG_PAINT_TYPE:
case VG_PAINT_COLOR_RAMP_SPREAD_MODE:
case VG_PAINT_PATTERN_TILING_MODE:
- vgSetParameteri(object, paramType, floor(value));
+ vegaSetParameteri(object, paramType, floor(value));
return;
break;
case VG_PAINT_COLOR:
@@ -994,7 +995,7 @@ void vegaSetParameterf(VGHandle object,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
break;
case VG_PAINT_COLOR_RAMP_PREMULTIPLIED: {
- struct vg_paint *p = (struct vg_paint *)object;
+ struct vg_paint *p = handle_to_paint(object);
paint_set_color_ramp_premultiplied(p, value);
}
break;
@@ -1026,9 +1027,9 @@ void vegaSetParameteri(VGHandle object,
VGint value)
{
struct vg_context *ctx = vg_current_context();
- void *ptr = (void*)object;
+ void *ptr = handle_to_pointer(object);
- if (!object || object == VG_INVALID_HANDLE || !is_aligned(ptr)) {
+ if (object == VG_INVALID_HANDLE || !is_aligned(ptr)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -1039,7 +1040,7 @@ void vegaSetParameteri(VGHandle object,
value > VG_PAINT_TYPE_PATTERN)
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else {
- struct vg_paint *paint = (struct vg_paint *)ptr;
+ struct vg_paint *paint = handle_to_paint(object);
paint_set_type(paint, value);
}
break;
@@ -1055,12 +1056,12 @@ void vegaSetParameteri(VGHandle object,
value > VG_COLOR_RAMP_SPREAD_REFLECT)
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else {
- struct vg_paint *paint = (struct vg_paint *)ptr;
+ struct vg_paint *paint = handle_to_paint(object);
paint_set_spread_mode(paint, value);
}
break;
case VG_PAINT_COLOR_RAMP_PREMULTIPLIED: {
- struct vg_paint *p = (struct vg_paint *)object;
+ struct vg_paint *p = handle_to_paint(object);
paint_set_color_ramp_premultiplied(p, value);
}
break;
@@ -1069,7 +1070,7 @@ void vegaSetParameteri(VGHandle object,
value > VG_TILE_REFLECT)
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else {
- struct vg_paint *paint = (struct vg_paint *)ptr;
+ struct vg_paint *paint = handle_to_paint(object);
paint_set_pattern_tiling(paint, value);
}
break;
@@ -1102,10 +1103,10 @@ void vegaSetParameterfv(VGHandle object,
const VGfloat * values)
{
struct vg_context *ctx = vg_current_context();
- void *ptr = (void*)object;
- VGint real_count = vgGetParameterVectorSize(object, paramType);
+ void *ptr = handle_to_pointer(object);
+ VGint real_count = vegaGetParameterVectorSize(object, paramType);
- if (!object || object == VG_INVALID_HANDLE || !is_aligned(ptr)) {
+ if (object == VG_INVALID_HANDLE || !is_aligned(ptr)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -1125,15 +1126,18 @@ void vegaSetParameterfv(VGHandle object,
if (count != 1)
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else
- vgSetParameterf(object, paramType, values[0]);
+ vegaSetParameterf(object, paramType, values[0]);
return;
break;
case VG_PAINT_COLOR: {
if (count != 4)
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_set_color(paint, values);
+ if (ctx->state.vg.fill_paint == paint ||
+ ctx->state.vg.stroke_paint == paint)
+ ctx->state.dirty |= PAINT_DIRTY;
}
}
break;
@@ -1141,7 +1145,7 @@ void vegaSetParameterfv(VGHandle object,
if (count && count < 4)
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
count = MIN2(count, VEGA_MAX_COLOR_RAMP_STOPS);
paint_set_ramp_stops(paint, values, count);
{
@@ -1159,7 +1163,7 @@ void vegaSetParameterfv(VGHandle object,
if (count != 4)
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_set_linear_gradient(paint, values);
{
VGint vals[4];
@@ -1176,7 +1180,7 @@ void vegaSetParameterfv(VGHandle object,
if (count != 5)
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_set_radial_gradient(paint, values);
{
VGint vals[5];
@@ -1215,10 +1219,10 @@ void vegaSetParameteriv(VGHandle object,
const VGint * values)
{
struct vg_context *ctx = vg_current_context();
- void *ptr = (void*)object;
- VGint real_count = vgGetParameterVectorSize(object, paramType);
+ void *ptr = handle_to_pointer(object);
+ VGint real_count = vegaGetParameterVectorSize(object, paramType);
- if (!object || object == VG_INVALID_HANDLE || !is_aligned(ptr)) {
+ if (object == VG_INVALID_HANDLE || !is_aligned(ptr)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -1238,15 +1242,18 @@ void vegaSetParameteriv(VGHandle object,
if (count != 1)
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else
- vgSetParameteri(object, paramType, values[0]);
+ vegaSetParameteri(object, paramType, values[0]);
return;
break;
case VG_PAINT_COLOR: {
if (count != 4)
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_set_coloriv(paint, values);
+ if (ctx->state.vg.fill_paint == paint ||
+ ctx->state.vg.stroke_paint == paint)
+ ctx->state.dirty |= PAINT_DIRTY;
}
}
break;
@@ -1256,7 +1263,7 @@ void vegaSetParameteriv(VGHandle object,
else {
VGfloat *vals = 0;
int i;
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
if (count) {
vals = malloc(sizeof(VGfloat)*count);
for (i = 0; i < count; ++i)
@@ -1274,7 +1281,7 @@ void vegaSetParameteriv(VGHandle object,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else {
VGfloat vals[4];
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
vals[0] = values[0];
vals[1] = values[1];
vals[2] = values[2];
@@ -1289,7 +1296,7 @@ void vegaSetParameteriv(VGHandle object,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
else {
VGfloat vals[5];
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
vals[0] = values[0];
vals[1] = values[1];
vals[2] = values[2];
@@ -1318,9 +1325,8 @@ VGint vegaGetParameterVectorSize(VGHandle object,
VGint paramType)
{
struct vg_context *ctx = vg_current_context();
- void *ptr = (void*)object;
- if (!ptr || object == VG_INVALID_HANDLE) {
+ if (object == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return 0;
}
@@ -1334,7 +1340,7 @@ VGint vegaGetParameterVectorSize(VGHandle object,
case VG_PAINT_COLOR:
return 4;
case VG_PAINT_COLOR_RAMP_STOPS: {
- struct vg_paint *p = (struct vg_paint *)object;
+ struct vg_paint *p = handle_to_paint(object);
return paint_num_ramp_stops(p);
}
break;
@@ -1374,9 +1380,8 @@ VGfloat vegaGetParameterf(VGHandle object,
VGint paramType)
{
struct vg_context *ctx = vg_current_context();
- void *ptr = (void*)object;
- if (!ptr || object == VG_INVALID_HANDLE) {
+ if (object == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return 0;
}
@@ -1386,7 +1391,7 @@ VGfloat vegaGetParameterf(VGHandle object,
case VG_PAINT_COLOR_RAMP_SPREAD_MODE:
case VG_PAINT_COLOR_RAMP_PREMULTIPLIED:
case VG_PAINT_PATTERN_TILING_MODE:
- return vgGetParameteri(object, paramType);
+ return vegaGetParameteri(object, paramType);
break;
case VG_PAINT_COLOR:
case VG_PAINT_COLOR_RAMP_STOPS:
@@ -1398,17 +1403,17 @@ VGfloat vegaGetParameterf(VGHandle object,
case VG_PATH_FORMAT:
return VG_PATH_FORMAT_STANDARD;
case VG_PATH_SCALE: {
- struct path *p = (struct path*)object;
+ struct path *p = handle_to_path(object);
return path_scale(p);
}
case VG_PATH_BIAS: {
- struct path *p = (struct path*)object;
+ struct path *p = handle_to_path(object);
return path_bias(p);
}
case VG_PATH_DATATYPE:
case VG_PATH_NUM_SEGMENTS:
case VG_PATH_NUM_COORDS:
- return vgGetParameteri(object, paramType);
+ return vegaGetParameteri(object, paramType);
break;
case VG_IMAGE_FORMAT:
@@ -1416,7 +1421,7 @@ VGfloat vegaGetParameterf(VGHandle object,
case VG_IMAGE_HEIGHT:
#ifdef OPENVG_VERSION_1_1
case VG_FONT_NUM_GLYPHS:
- return vgGetParameteri(object, paramType);
+ return vegaGetParameteri(object, paramType);
break;
#endif
@@ -1431,30 +1436,29 @@ VGint vegaGetParameteri(VGHandle object,
VGint paramType)
{
struct vg_context *ctx = vg_current_context();
- void *ptr = (void*)object;
- if (!ptr || object == VG_INVALID_HANDLE) {
+ if (object == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return 0;
}
switch(paramType) {
case VG_PAINT_TYPE: {
- struct vg_paint *paint = (struct vg_paint *)ptr;
+ struct vg_paint *paint = handle_to_paint(object);
return paint_type(paint);
}
break;
case VG_PAINT_COLOR_RAMP_SPREAD_MODE: {
- struct vg_paint *p = (struct vg_paint *)object;
+ struct vg_paint *p = handle_to_paint(object);
return paint_spread_mode(p);
}
case VG_PAINT_COLOR_RAMP_PREMULTIPLIED: {
- struct vg_paint *p = (struct vg_paint *)object;
+ struct vg_paint *p = handle_to_paint(object);
return paint_color_ramp_premultiplied(p);
}
break;
case VG_PAINT_PATTERN_TILING_MODE: {
- struct vg_paint *p = (struct vg_paint *)object;
+ struct vg_paint *p = handle_to_paint(object);
return paint_pattern_tiling(p);
}
break;
@@ -1469,40 +1473,40 @@ VGint vegaGetParameteri(VGHandle object,
return VG_PATH_FORMAT_STANDARD;
case VG_PATH_SCALE:
case VG_PATH_BIAS:
- return vgGetParameterf(object, paramType);
+ return vegaGetParameterf(object, paramType);
case VG_PATH_DATATYPE: {
- struct path *p = (struct path*)object;
+ struct path *p = handle_to_path(object);
return path_datatype(p);
}
case VG_PATH_NUM_SEGMENTS: {
- struct path *p = (struct path*)object;
+ struct path *p = handle_to_path(object);
return path_num_segments(p);
}
case VG_PATH_NUM_COORDS: {
- struct path *p = (struct path*)object;
+ struct path *p = handle_to_path(object);
return path_num_coords(p);
}
break;
case VG_IMAGE_FORMAT: {
- struct vg_image *img = (struct vg_image*)object;
+ struct vg_image *img = handle_to_image(object);
return img->format;
}
break;
case VG_IMAGE_WIDTH: {
- struct vg_image *img = (struct vg_image*)object;
+ struct vg_image *img = handle_to_image(object);
return img->width;
}
break;
case VG_IMAGE_HEIGHT: {
- struct vg_image *img = (struct vg_image*)object;
+ struct vg_image *img = handle_to_image(object);
return img->height;
}
break;
#ifdef OPENVG_VERSION_1_1
case VG_FONT_NUM_GLYPHS: {
- struct vg_font *font = (struct vg_font*)object;
+ struct vg_font *font = handle_to_font(object);
return font_num_glyphs(font);
}
break;
@@ -1521,10 +1525,9 @@ void vegaGetParameterfv(VGHandle object,
VGfloat * values)
{
struct vg_context *ctx = vg_current_context();
- void *ptr = (void*)object;
- VGint real_count = vgGetParameterVectorSize(object, paramType);
+ VGint real_count = vegaGetParameterVectorSize(object, paramType);
- if (!ptr || object == VG_INVALID_HANDLE) {
+ if (object == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -1537,41 +1540,41 @@ void vegaGetParameterfv(VGHandle object,
switch(paramType) {
case VG_PAINT_TYPE: {
- struct vg_paint *p = (struct vg_paint *)object;
+ struct vg_paint *p = handle_to_paint(object);
values[0] = paint_type(p);
}
break;
case VG_PAINT_COLOR_RAMP_SPREAD_MODE: {
- struct vg_paint *p = (struct vg_paint *)object;
+ struct vg_paint *p = handle_to_paint(object);
values[0] = paint_spread_mode(p);
}
break;
case VG_PAINT_COLOR_RAMP_PREMULTIPLIED: {
- struct vg_paint *p = (struct vg_paint *)object;
+ struct vg_paint *p = handle_to_paint(object);
values[0] = paint_color_ramp_premultiplied(p);
}
break;
case VG_PAINT_PATTERN_TILING_MODE: {
- values[0] = vgGetParameterf(object, paramType);
+ values[0] = vegaGetParameterf(object, paramType);
}
break;
case VG_PAINT_COLOR: {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_get_color(paint, values);
}
break;
case VG_PAINT_COLOR_RAMP_STOPS: {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_ramp_stops(paint, values, count);
}
break;
case VG_PAINT_LINEAR_GRADIENT: {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_linear_gradient(paint, values);
}
break;
case VG_PAINT_RADIAL_GRADIENT: {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_radial_gradient(paint, values);
}
break;
@@ -1580,11 +1583,11 @@ void vegaGetParameterfv(VGHandle object,
case VG_PATH_DATATYPE:
case VG_PATH_NUM_SEGMENTS:
case VG_PATH_NUM_COORDS:
- values[0] = vgGetParameteri(object, paramType);
+ values[0] = vegaGetParameteri(object, paramType);
break;
case VG_PATH_SCALE:
case VG_PATH_BIAS:
- values[0] = vgGetParameterf(object, paramType);
+ values[0] = vegaGetParameterf(object, paramType);
break;
case VG_IMAGE_FORMAT:
@@ -1592,7 +1595,7 @@ void vegaGetParameterfv(VGHandle object,
case VG_IMAGE_HEIGHT:
#ifdef OPENVG_VERSION_1_1
case VG_FONT_NUM_GLYPHS:
- values[0] = vgGetParameteri(object, paramType);
+ values[0] = vegaGetParameteri(object, paramType);
break;
#endif
@@ -1608,10 +1611,9 @@ void vegaGetParameteriv(VGHandle object,
VGint * values)
{
struct vg_context *ctx = vg_current_context();
- void *ptr = (void*)object;
- VGint real_count = vgGetParameterVectorSize(object, paramType);
+ VGint real_count = vegaGetParameterVectorSize(object, paramType);
- if (!ptr || object == VG_INVALID_HANDLE) {
+ if (object || object == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -1629,45 +1631,45 @@ void vegaGetParameteriv(VGHandle object,
case VG_PAINT_PATTERN_TILING_MODE:
#ifdef OPENVG_VERSION_1_1
case VG_FONT_NUM_GLYPHS:
- values[0] = vgGetParameteri(object, paramType);
+ values[0] = vegaGetParameteri(object, paramType);
break;
#endif
case VG_PAINT_COLOR: {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_get_coloriv(paint, values);
}
break;
case VG_PAINT_COLOR_RAMP_STOPS: {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_ramp_stopsi(paint, values, count);
}
break;
case VG_PAINT_LINEAR_GRADIENT: {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_linear_gradienti(paint, values);
}
break;
case VG_PAINT_RADIAL_GRADIENT: {
- struct vg_paint *paint = (struct vg_paint *)object;
+ struct vg_paint *paint = handle_to_paint(object);
paint_radial_gradienti(paint, values);
}
break;
case VG_PATH_SCALE:
case VG_PATH_BIAS:
- values[0] = vgGetParameterf(object, paramType);
+ values[0] = vegaGetParameterf(object, paramType);
break;
case VG_PATH_FORMAT:
case VG_PATH_DATATYPE:
case VG_PATH_NUM_SEGMENTS:
case VG_PATH_NUM_COORDS:
- values[0] = vgGetParameteri(object, paramType);
+ values[0] = vegaGetParameteri(object, paramType);
break;
case VG_IMAGE_FORMAT:
case VG_IMAGE_WIDTH:
case VG_IMAGE_HEIGHT:
- values[0] = vgGetParameteri(object, paramType);
+ values[0] = vegaGetParameteri(object, paramType);
break;
default:
diff --git a/src/gallium/state_trackers/vega/api_path.c b/src/gallium/state_trackers/vega/api_path.c
index fe57b7671d..ab6ce95894 100644
--- a/src/gallium/state_trackers/vega/api_path.c
+++ b/src/gallium/state_trackers/vega/api_path.c
@@ -27,6 +27,7 @@
#include "VG/openvg.h"
#include "vg_context.h"
+#include "handle.h"
#include "path.h"
#include "api.h"
@@ -55,9 +56,9 @@ VGPath vegaCreatePath(VGint pathFormat,
return VG_INVALID_HANDLE;
}
- return (VGPath)path_create(datatype, scale, bias,
- segmentCapacityHint, coordCapacityHint,
- capabilities);
+ return path_to_handle(path_create(datatype, scale, bias,
+ segmentCapacityHint, coordCapacityHint,
+ capabilities));
}
void vegaClearPath(VGPath path, VGbitfield capabilities)
@@ -70,7 +71,7 @@ void vegaClearPath(VGPath path, VGbitfield capabilities)
return;
}
- p = (struct path *)path;
+ p = handle_to_path(path);
path_clear(p, capabilities);
}
@@ -84,7 +85,7 @@ void vegaDestroyPath(VGPath p)
return;
}
- path = (struct path *)p;
+ path = handle_to_path(p);
path_destroy(path);
}
@@ -100,7 +101,7 @@ void vegaRemovePathCapabilities(VGPath path,
return;
}
- p = (struct path*)path;
+ p = handle_to_path(path);
current = path_capabilities(p);
path_set_capabilities(p, (current &
(~(capabilities & VG_PATH_CAPABILITY_ALL))));
@@ -115,7 +116,7 @@ VGbitfield vegaGetPathCapabilities(VGPath path)
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return 0;
}
- p = (struct path*)path;
+ p = handle_to_path(path);
return path_capabilities(p);
}
@@ -128,8 +129,8 @@ void vegaAppendPath(VGPath dstPath, VGPath srcPath)
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- src = (struct path *)srcPath;
- dst = (struct path *)dstPath;
+ src = handle_to_path(srcPath);
+ dst = handle_to_path(dstPath);
if (!(path_capabilities(src) & VG_PATH_CAPABILITY_APPEND_FROM) ||
!(path_capabilities(dst) & VG_PATH_CAPABILITY_APPEND_TO)) {
@@ -167,9 +168,9 @@ void vegaAppendPathData(VGPath dstPath,
}
}
- p = (struct path*)dstPath;
+ p = handle_to_path(dstPath);
- if (!pathData || !is_aligned_to(pathData, path_datatype_size(p))) {
+ if (!p || !is_aligned_to(p, path_datatype_size(p))) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return;
}
@@ -199,7 +200,7 @@ void vegaModifyPathCoords(VGPath dstPath,
return;
}
- p = (struct path *)dstPath;
+ p = handle_to_path(dstPath);
if (!pathData || !is_aligned_to(pathData, path_datatype_size(p))) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
@@ -226,8 +227,8 @@ void vegaTransformPath(VGPath dstPath, VGPath srcPath)
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- src = (struct path *)srcPath;
- dst = (struct path *)dstPath;
+ src = handle_to_path(srcPath);
+ dst = handle_to_path(dstPath);
if (!(path_capabilities(src) & VG_PATH_CAPABILITY_TRANSFORM_FROM) ||
!(path_capabilities(dst) & VG_PATH_CAPABILITY_TRANSFORM_TO)) {
@@ -251,9 +252,9 @@ VGboolean vegaInterpolatePath(VGPath dstPath,
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return VG_FALSE;
}
- dst = (struct path *)dstPath;
- start = (struct path *)startPath;
- end = (struct path *)endPath;
+ dst = handle_to_path(dstPath);
+ start = handle_to_path(startPath);
+ end = handle_to_path(endPath);
if (!(path_capabilities(dst) & VG_PATH_CAPABILITY_INTERPOLATE_TO) ||
!(path_capabilities(start) & VG_PATH_CAPABILITY_INTERPOLATE_FROM) ||
@@ -285,7 +286,7 @@ VGfloat vegaPathLength(VGPath path,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return -1;
}
- p = (struct path*)path;
+ p = handle_to_path(path);
if (!(path_capabilities(p) & VG_PATH_CAPABILITY_PATH_LENGTH)) {
vg_set_error(ctx, VG_PATH_CAPABILITY_ERROR);
@@ -330,7 +331,7 @@ void vegaPointAlongPath(VGPath path,
return;
}
- p = (struct path*)path;
+ p = handle_to_path(path);
caps = path_capabilities(p);
if (!(caps & VG_PATH_CAPABILITY_POINT_ALONG_PATH) ||
@@ -385,7 +386,7 @@ void vegaPathBounds(VGPath path,
return;
}
- p = (struct path*)path;
+ p = handle_to_path(path);
caps = path_capabilities(p);
if (!(caps & VG_PATH_CAPABILITY_PATH_BOUNDS)) {
@@ -422,7 +423,7 @@ void vegaPathTransformedBounds(VGPath path,
return;
}
- p = (struct path*)path;
+ p = handle_to_path(path);
caps = path_capabilities(p);
if (!(caps & VG_PATH_CAPABILITY_PATH_TRANSFORMED_BOUNDS)) {
@@ -466,6 +467,7 @@ void vegaPathTransformedBounds(VGPath path,
void vegaDrawPath(VGPath path, VGbitfield paintModes)
{
struct vg_context *ctx = vg_current_context();
+ struct path *p = handle_to_path(path);
if (path == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
@@ -477,9 +479,9 @@ void vegaDrawPath(VGPath path, VGbitfield paintModes)
return;
}
- if (path_is_empty((struct path*)path))
+ if (path_is_empty(p))
return;
- path_render((struct path*)path, paintModes,
+ path_render(p, paintModes,
&ctx->state.vg.path_user_to_surface_matrix);
}
diff --git a/src/gallium/state_trackers/vega/api_text.c b/src/gallium/state_trackers/vega/api_text.c
index 7c6b479409..824c763040 100644
--- a/src/gallium/state_trackers/vega/api_text.c
+++ b/src/gallium/state_trackers/vega/api_text.c
@@ -29,6 +29,7 @@
#include "vg_context.h"
#include "text.h"
#include "api.h"
+#include "handle.h"
#include "util/u_memory.h"
@@ -43,19 +44,19 @@ VGFont vegaCreateFont(VGint glyphCapacityHint)
return VG_INVALID_HANDLE;
}
- return (VGFont) font_create(glyphCapacityHint);
+ return font_to_handle(font_create(glyphCapacityHint));
}
void vegaDestroyFont(VGFont f)
{
- struct vg_font *font = (struct vg_font *)f;
+ struct vg_font *font = handle_to_font(f);
struct vg_context *ctx = vg_current_context();
if (f == VG_INVALID_HANDLE) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- if (!vg_object_is_valid((void *) font, VG_OBJECT_FONT)) {
+ if (!vg_object_is_valid(f, VG_OBJECT_FONT)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -75,7 +76,7 @@ void vegaSetGlyphToPath(VGFont font,
struct vg_font *f;
if (font == VG_INVALID_HANDLE ||
- !vg_context_is_object_valid(ctx, VG_OBJECT_FONT, (void *)font)) {
+ !vg_context_is_object_valid(ctx, VG_OBJECT_FONT, font)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -85,13 +86,13 @@ void vegaSetGlyphToPath(VGFont font,
return;
}
if (path != VG_INVALID_HANDLE &&
- !vg_context_is_object_valid(ctx, VG_OBJECT_PATH, (void *)path)) {
+ !vg_context_is_object_valid(ctx, VG_OBJECT_PATH, path)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- pathObj = (struct path*) path;
- f = (struct vg_font*) font;
+ pathObj = handle_to_path(path);
+ f = handle_to_font(font);
font_set_glyph_to_path(f, glyphIndex, pathObj,
isHinted, glyphOrigin, escapement);
@@ -108,7 +109,7 @@ void vegaSetGlyphToImage(VGFont font,
struct vg_font *f;
if (font == VG_INVALID_HANDLE ||
- !vg_context_is_object_valid(ctx, VG_OBJECT_FONT, (void *)font)) {
+ !vg_context_is_object_valid(ctx, VG_OBJECT_FONT, font)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
@@ -118,13 +119,13 @@ void vegaSetGlyphToImage(VGFont font,
return;
}
if (image != VG_INVALID_HANDLE &&
- !vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, (void *)image)) {
+ !vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, image)) {
vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
return;
}
- img_obj = (struct vg_image*)image;
- f = (struct vg_font*)font;
+ img_obj = handle_to_image(image);
+ f = handle_to_font(font);
font_set_glyph_to_image(f, glyphIndex, img_obj, glyphOrigin, escapement);
}
@@ -140,7 +141,7 @@ void vegaClearGlyph(VGFont font,
return;
}
- f = (struct vg_font*) font;
+ f = handle_to_font(font);
font_clear_glyph(f, glyphIndex);
}
@@ -161,7 +162,7 @@ void vegaDrawGlyph(VGFont font,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return;
}
- f = (struct vg_font*)font;
+ f = handle_to_font(font);
font_draw_glyph(f, glyphIndex, paintModes, allowAutoHinting);
}
@@ -199,7 +200,7 @@ void vegaDrawGlyphs(VGFont font,
return;
}
- f = (struct vg_font*)font;
+ f = handle_to_font(font);
font_draw_glyphs(f, glyphCount, glyphIndices,
adjustments_x, adjustments_y, paintModes, allowAutoHinting);
diff --git a/src/gallium/state_trackers/vega/handle.c b/src/gallium/state_trackers/vega/handle.c
new file mode 100644
index 0000000000..11eedd923e
--- /dev/null
+++ b/src/gallium/state_trackers/vega/handle.c
@@ -0,0 +1,93 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "handle.h"
+#include "util/u_hash.h"
+#include "util/u_hash_table.h"
+
+
+/**
+ * Hash keys are 32-bit VGHandles
+ */
+
+struct util_hash_table *handle_hash = NULL;
+
+
+static unsigned next_handle = 1;
+
+
+static unsigned
+hash_func(void *key)
+{
+ /* XXX this kind of ugly */
+ intptr_t ip = pointer_to_intptr(key);
+ return (unsigned) (ip & 0xffffffff);
+}
+
+
+static int
+compare(void *key1, void *key2)
+{
+ if (key1 < key2)
+ return -1;
+ else if (key1 > key2)
+ return +1;
+ else
+ return 0;
+}
+
+
+void
+init_handles(void)
+{
+ if (!handle_hash)
+ handle_hash = util_hash_table_create(hash_func, compare);
+}
+
+
+void
+free_handles(void)
+{
+ /* XXX destroy */
+}
+
+
+VGHandle
+create_handle(void *object)
+{
+ VGHandle h = next_handle++;
+ util_hash_table_set(handle_hash, intptr_to_pointer(h), object);
+ return h;
+}
+
+
+void
+destroy_handle(VGHandle h)
+{
+ util_hash_table_remove(handle_hash, intptr_to_pointer(h));
+}
+
diff --git a/src/gallium/state_trackers/vega/handle.h b/src/gallium/state_trackers/vega/handle.h
new file mode 100644
index 0000000000..9ed326d509
--- /dev/null
+++ b/src/gallium/state_trackers/vega/handle.h
@@ -0,0 +1,171 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Convert opaque VG object handles into pointers and vice versa.
+ * XXX This is not yet 64-bit safe! All VG handles are 32 bits in size.
+ */
+
+
+#ifndef HANDLE_H
+#define HANDLE_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_hash_table.h"
+#include "util/u_pointer.h"
+
+#include "VG/openvg.h"
+#include "vg_context.h"
+
+
+extern struct util_hash_table *handle_hash;
+
+
+struct vg_mask_layer;
+struct vg_font;
+struct vg_image;
+struct vg_paint;
+struct path;
+
+
+extern void
+init_handles(void);
+
+
+extern void
+free_handles(void);
+
+
+extern VGHandle
+create_handle(void *object);
+
+
+extern void
+destroy_handle(VGHandle h);
+
+
+static INLINE VGHandle
+object_to_handle(struct vg_object *obj)
+{
+ return obj ? obj->handle : VG_INVALID_HANDLE;
+}
+
+
+static INLINE VGHandle
+image_to_handle(struct vg_image *img)
+{
+ /* vg_image is derived from vg_object */
+ return object_to_handle((struct vg_object *) img);
+}
+
+
+static INLINE VGHandle
+masklayer_to_handle(struct vg_mask_layer *mask)
+{
+ /* vg_object is derived from vg_object */
+ return object_to_handle((struct vg_object *) mask);
+}
+
+
+static INLINE VGHandle
+font_to_handle(struct vg_font *font)
+{
+ return object_to_handle((struct vg_object *) font);
+}
+
+
+static INLINE VGHandle
+paint_to_handle(struct vg_paint *paint)
+{
+ return object_to_handle((struct vg_object *) paint);
+}
+
+
+static INLINE VGHandle
+path_to_handle(struct path *path)
+{
+ return object_to_handle((struct vg_object *) path);
+}
+
+
+static INLINE void *
+handle_to_pointer(VGHandle h)
+{
+ void *v = util_hash_table_get(handle_hash, intptr_to_pointer(h));
+#ifdef DEBUG
+ if (v) {
+ struct vg_object *obj = (struct vg_object *) v;
+ assert(obj->handle == h);
+ }
+#endif
+ return v;
+}
+
+
+static INLINE struct vg_font *
+handle_to_font(VGHandle h)
+{
+ return (struct vg_font *) handle_to_pointer(h);
+}
+
+
+static INLINE struct vg_image *
+handle_to_image(VGHandle h)
+{
+ return (struct vg_image *) handle_to_pointer(h);
+}
+
+
+static INLINE struct vg_mask_layer *
+handle_to_masklayer(VGHandle h)
+{
+ return (struct vg_mask_layer *) handle_to_pointer(h);
+}
+
+
+static INLINE struct vg_object *
+handle_to_object(VGHandle h)
+{
+ return (struct vg_object *) handle_to_pointer(h);
+}
+
+
+static INLINE struct vg_paint *
+handle_to_paint(VGHandle h)
+{
+ return (struct vg_paint *) handle_to_pointer(h);
+}
+
+
+static INLINE struct path *
+handle_to_path(VGHandle h)
+{
+ return (struct path *) handle_to_pointer(h);
+}
+
+
+#endif /* HANDLE_H */
diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c
index 318ea94bdf..44480876b6 100644
--- a/src/gallium/state_trackers/vega/image.c
+++ b/src/gallium/state_trackers/vega/image.c
@@ -257,7 +257,7 @@ struct vg_image * image_create(VGImageFormat format,
image->sampler.normalized_coords = 1;
assert(screen->is_format_supported(screen, pformat, PIPE_TEXTURE_2D,
- 0, PIPE_BIND_SAMPLER_VIEW, 0));
+ 0, PIPE_BIND_SAMPLER_VIEW));
memset(&pt, 0, sizeof(pt));
pt.target = PIPE_TEXTURE_2D;
@@ -349,6 +349,8 @@ void image_destroy(struct vg_image *img)
array_destroy(img->children_array);
}
+ vg_free_object(&img->base);
+
pipe_sampler_view_reference(&img->sampler_view, NULL);
FREE(img);
}
@@ -516,8 +518,6 @@ void image_copy(struct vg_image *dst, VGint dx, VGint dy,
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return;
}
- /* make sure rendering has completed */
- ctx->pipe->flush(ctx->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
vg_copy_texture(ctx, dst->sampler_view->texture, dst->x + dx, dst->y + dy,
src->sampler_view, src->x + sx, src->y + sy, width, height);
}
@@ -567,9 +567,6 @@ void image_set_pixels(VGint dx, VGint dy,
struct pipe_surface *surf, surf_tmpl;
struct st_renderbuffer *strb = ctx->draw_buffer->strb;
- /* make sure rendering has completed */
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
-
memset(&surf_tmpl, 0, sizeof(surf_tmpl));
u_surface_default_template(&surf_tmpl, image_texture(src),
0 /* no bind flag - not a surface*/);
@@ -593,9 +590,6 @@ void image_get_pixels(struct vg_image *dst, VGint dx, VGint dy,
/* flip the y coordinates */
/*dy = dst->height - dy - height;*/
- /* make sure rendering has completed */
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
-
memset(&surf_tmpl, 0, sizeof(surf_tmpl));
u_surface_default_template(&surf_tmpl, image_texture(dst),
PIPE_BIND_RENDER_TARGET);
diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c
index dfd0600e44..10590e29cd 100644
--- a/src/gallium/state_trackers/vega/mask.c
+++ b/src/gallium/state_trackers/vega/mask.c
@@ -111,8 +111,6 @@ static void read_alpha_mask(void * data, VGint dataStride,
VGubyte *dst = (VGubyte *)data;
VGint xoffset = 0, yoffset = 0;
- /* make sure rendering has completed */
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
if (sx < 0) {
xoffset = -sx;
xoffset *= _vega_size_for_format(dataFormat);
diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c
index 2db8cbcf7c..6e5348a1ff 100644
--- a/src/gallium/state_trackers/vega/paint.c
+++ b/src/gallium/state_trackers/vega/paint.c
@@ -748,3 +748,10 @@ void paint_fill_constant_buffer(struct vg_paint *paint,
abort();
}
}
+
+VGboolean paint_is_opaque(struct vg_paint *paint)
+{
+ /* TODO add other paint types and make sure PAINT_DIRTY gets set */
+ return (paint->type == VG_PAINT_TYPE_COLOR &&
+ floatsEqual(paint->solid.color[3], 1.0f));
+}
diff --git a/src/gallium/state_trackers/vega/paint.h b/src/gallium/state_trackers/vega/paint.h
index 3de3bbe12e..e5357763b8 100644
--- a/src/gallium/state_trackers/vega/paint.h
+++ b/src/gallium/state_trackers/vega/paint.h
@@ -118,5 +118,6 @@ void paint_fill_constant_buffer(struct vg_paint *paint,
const struct matrix *mat,
void *buffer);
+VGboolean paint_is_opaque(struct vg_paint *paint);
#endif
diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c
index a491de27fa..bcc5cb272c 100644
--- a/src/gallium/state_trackers/vega/polygon.c
+++ b/src/gallium/state_trackers/vega/polygon.c
@@ -303,7 +303,6 @@ void polygon_fill(struct polygon *poly, struct vg_context *ctx)
vbuffer.buffer = poly->vbuf;
vbuffer.stride = COMPONENTS * sizeof(float); /* vertex size */
vbuffer.buffer_offset = 0;
- vbuffer.max_index = poly->num_verts - 1;
renderer_polygon_stencil_begin(ctx->renderer,
&velement, ctx->state.vg.fill_rule, VG_FALSE);
@@ -354,7 +353,6 @@ void polygon_array_fill(struct polygon_array *polyarray, struct vg_context *ctx)
polygon_prepare_buffer(ctx, poly);
vbuffer.buffer = poly->vbuf;
- vbuffer.max_index = poly->num_verts - 1;
renderer_polygon_stencil(ctx->renderer, &vbuffer,
PIPE_PRIM_TRIANGLE_FAN, 0, (VGuint) poly->num_verts);
diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c
index 7871c516c4..5715073e2d 100644
--- a/src/gallium/state_trackers/vega/renderer.c
+++ b/src/gallium/state_trackers/vega/renderer.c
@@ -28,7 +28,7 @@
#include "renderer.h"
#include "vg_context.h"
-#include "image.h"
+#include "paint.h" /* for paint_is_opaque */
#include "pipe/p_context.h"
#include "pipe/p_state.h"
@@ -140,7 +140,7 @@ static VGboolean renderer_can_support(struct renderer *renderer,
struct pipe_screen *screen = renderer->pipe->screen;
return screen->is_format_supported(screen,
- res->format, res->target, 0, bindings, 0);
+ res->format, res->target, 0, bindings);
}
/**
@@ -174,6 +174,7 @@ static void renderer_set_mvp(struct renderer *renderer,
pipe_resource_reference(&cbuf, NULL);
cbuf = pipe_buffer_create(renderer->pipe->screen,
PIPE_BIND_CONSTANT_BUFFER,
+ PIPE_USAGE_STATIC,
sizeof(consts));
if (cbuf) {
pipe_buffer_write(renderer->pipe, cbuf,
@@ -474,7 +475,8 @@ static void renderer_set_custom_fs(struct renderer *renderer,
pipe_resource_reference(&cbuf, NULL);
cbuf = pipe_buffer_create(renderer->pipe->screen,
- PIPE_BIND_CONSTANT_BUFFER, const_buffer_len);
+ PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STATIC,
+ const_buffer_len);
pipe_buffer_write(renderer->pipe, cbuf, 0,
const_buffer_len, const_buffer);
renderer->pipe->set_constant_buffer(renderer->pipe,
@@ -572,7 +574,7 @@ static void renderer_quad_draw(struct renderer *r)
sizeof(r->vertices),
PIPE_BIND_VERTEX_BUFFER);
if (buf) {
- util_draw_vertex_buffer(r->pipe, buf, 0,
+ util_draw_vertex_buffer(r->pipe, r->cso, buf, 0,
PIPE_PRIM_TRIANGLE_FAN,
Elements(r->vertices), /* verts */
Elements(r->vertices[0])); /* attribs/vert */
@@ -1050,7 +1052,7 @@ void renderer_polygon_stencil(struct renderer *renderer,
{
assert(renderer->state == RENDERER_STATE_POLYGON_STENCIL);
- renderer->pipe->set_vertex_buffers(renderer->pipe, 1, vbuf);
+ cso_set_vertex_buffers(renderer->cso, 1, vbuf);
if (!renderer->u.polygon_stencil.manual_two_sides) {
util_draw_arrays(renderer->pipe, mode, start, count);
@@ -1290,7 +1292,11 @@ static void renderer_validate_blend(struct renderer *renderer,
blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
break;
case VG_BLEND_SRC_OVER:
- if (!util_format_has_alpha(fb_format)) {
+ if (paint_is_opaque(state->fill_paint) &&
+ paint_is_opaque(state->stroke_paint)) {
+ /* no blending */
+ }
+ else if (!util_format_has_alpha(fb_format)) {
blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA;
@@ -1457,11 +1463,11 @@ void renderer_copy_surface(struct renderer *ctx,
}
assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D,
- 0, PIPE_BIND_SAMPLER_VIEW, 0));
+ 0, PIPE_BIND_SAMPLER_VIEW));
assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
- 0, PIPE_BIND_SAMPLER_VIEW, 0));
+ 0, PIPE_BIND_SAMPLER_VIEW));
assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
- 0, PIPE_BIND_RENDER_TARGET, 0));
+ 0, PIPE_BIND_RENDER_TARGET));
/*
* XXX for now we're always creating a temporary texture.
diff --git a/src/gallium/state_trackers/vega/shader.c b/src/gallium/state_trackers/vega/shader.c
index 0ed721376c..bee6d84001 100644
--- a/src/gallium/state_trackers/vega/shader.c
+++ b/src/gallium/state_trackers/vega/shader.c
@@ -34,9 +34,7 @@
#include "renderer.h"
#include "pipe/p_context.h"
-#include "pipe/p_screen.h"
#include "pipe/p_state.h"
-#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
@@ -136,6 +134,8 @@ static VGboolean blend_use_shader(struct vg_context *ctx)
switch (ctx->state.vg.blend_mode) {
case VG_BLEND_SRC_OVER:
advanced_blending =
+ (!paint_is_opaque(ctx->state.vg.fill_paint) ||
+ !paint_is_opaque(ctx->state.vg.stroke_paint)) &&
util_format_has_alpha(ctx->draw_buffer->strb->format);
break;
case VG_BLEND_DST_OVER:
diff --git a/src/gallium/state_trackers/vega/text.c b/src/gallium/state_trackers/vega/text.c
index 01ff602f58..6714ee9ad3 100644
--- a/src/gallium/state_trackers/vega/text.c
+++ b/src/gallium/state_trackers/vega/text.c
@@ -30,7 +30,6 @@
#include "text.h"
#include "image.h"
#include "path.h"
-#include "api.h"
#ifdef OPENVG_VERSION_1_1
diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c
index 0844012cc3..c2ab56455b 100644
--- a/src/gallium/state_trackers/vega/vg_context.c
+++ b/src/gallium/state_trackers/vega/vg_context.c
@@ -30,17 +30,16 @@
#include "renderer.h"
#include "shaders_cache.h"
#include "shader.h"
-#include "asm_util.h"
#include "vg_manager.h"
#include "api.h"
#include "mask.h"
+#include "handle.h"
#include "pipe/p_context.h"
#include "util/u_inlines.h"
#include "cso_cache/cso_context.h"
-#include "util/u_simple_shaders.h"
#include "util/u_memory.h"
#include "util/u_blit.h"
#include "util/u_sampler.h"
@@ -70,7 +69,7 @@ choose_depth_stencil_format(struct vg_context *ctx)
for (fmt = formats; *fmt != PIPE_FORMAT_NONE; fmt++) {
if (screen->is_format_supported(screen, *fmt,
- PIPE_TEXTURE_2D, 0, PIPE_BIND_DEPTH_STENCIL, 0))
+ PIPE_TEXTURE_2D, 0, PIPE_BIND_DEPTH_STENCIL))
break;
}
@@ -184,17 +183,26 @@ void vg_init_object(struct vg_object *obj, struct vg_context *ctx, enum vg_objec
{
obj->type = type;
obj->ctx = ctx;
+ obj->handle = create_handle(obj);
+}
+
+/** free object resources, but not the object itself */
+void vg_free_object(struct vg_object *obj)
+{
+ obj->type = 0;
+ obj->ctx = NULL;
+ destroy_handle(obj->handle);
}
VGboolean vg_context_is_object_valid(struct vg_context *ctx,
enum vg_object_type type,
- void *ptr)
+ VGHandle object)
{
if (ctx) {
struct cso_hash *hash = ctx->owned_objects[type];
if (!hash)
return VG_FALSE;
- return cso_hash_contains(hash, (unsigned)(long)ptr);
+ return cso_hash_contains(hash, (unsigned)(long)object);
}
return VG_FALSE;
}
@@ -400,8 +408,9 @@ void vg_validate_state(struct vg_context *ctx)
if (vg_context_update_depth_stencil_rb(ctx, stfb->width, stfb->height))
ctx->state.dirty |= DEPTH_STENCIL_DIRTY;
- /* blend state depends on fb format */
- if (ctx->state.dirty & FRAMEBUFFER_DIRTY)
+ /* blend state depends on fb format and paint color */
+ if ((ctx->state.dirty & FRAMEBUFFER_DIRTY) ||
+ (ctx->state.dirty & PAINT_DIRTY))
ctx->state.dirty |= BLEND_DIRTY;
renderer_validate(ctx->renderer, ctx->state.dirty,
@@ -414,10 +423,10 @@ void vg_validate_state(struct vg_context *ctx)
shader_set_color_transform(ctx->shader, ctx->state.vg.color_transform);
}
-VGboolean vg_object_is_valid(void *ptr, enum vg_object_type type)
+VGboolean vg_object_is_valid(VGHandle object, enum vg_object_type type)
{
- struct vg_object *obj = ptr;
- if (ptr && is_aligned(obj) && obj->type == type)
+ struct vg_object *obj = handle_to_object(object);
+ if (obj && is_aligned(obj) && obj->type == type)
return VG_TRUE;
else
return VG_FALSE;
diff --git a/src/gallium/state_trackers/vega/vg_context.h b/src/gallium/state_trackers/vega/vg_context.h
index d616a20a3d..71491a5aa2 100644
--- a/src/gallium/state_trackers/vega/vg_context.h
+++ b/src/gallium/state_trackers/vega/vg_context.h
@@ -81,10 +81,12 @@ enum dirty_state {
BLEND_DIRTY = 1 << 0,
FRAMEBUFFER_DIRTY = 1 << 1,
DEPTH_STENCIL_DIRTY = 1 << 2,
+ PAINT_DIRTY = 1 << 3,
ALL_DIRTY = BLEND_DIRTY |
FRAMEBUFFER_DIRTY |
- DEPTH_STENCIL_DIRTY
+ DEPTH_STENCIL_DIRTY |
+ PAINT_DIRTY
};
struct vg_context
@@ -129,12 +131,21 @@ struct vg_context
struct blit_state *blit;
};
+
+/**
+ * Base class for VG objects like paths, images, fonts.
+ */
struct vg_object {
enum vg_object_type type;
+ VGHandle handle;
struct vg_context *ctx;
};
+
+
void vg_init_object(struct vg_object *obj, struct vg_context *ctx, enum vg_object_type type);
-VGboolean vg_object_is_valid(void *ptr, enum vg_object_type type);
+void vg_free_object(struct vg_object *obj);
+
+VGboolean vg_object_is_valid(VGHandle object, enum vg_object_type type);
struct vg_context *vg_create_context(struct pipe_context *pipe,
const void *visual,
@@ -145,7 +156,7 @@ void vg_set_current_context(struct vg_context *ctx);
VGboolean vg_context_is_object_valid(struct vg_context *ctx,
enum vg_object_type type,
- void *ptr);
+ VGHandle object);
void vg_context_add_object(struct vg_context *ctx,
enum vg_object_type type,
void *ptr);
diff --git a/src/gallium/state_trackers/vega/vg_manager.c b/src/gallium/state_trackers/vega/vg_manager.c
index de935768b2..eeea68677d 100644
--- a/src/gallium/state_trackers/vega/vg_manager.c
+++ b/src/gallium/state_trackers/vega/vg_manager.c
@@ -33,15 +33,14 @@
#include "pipe/p_screen.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
-#include "util/u_sampler.h"
#include "util/u_box.h"
#include "util/u_surface.h"
#include "vg_api.h"
#include "vg_manager.h"
#include "vg_context.h"
-#include "image.h"
#include "api.h"
+#include "handle.h"
static boolean
vg_context_update_color_rb(struct vg_context *ctx, struct pipe_resource *pt)
@@ -143,8 +142,8 @@ vg_context_flush(struct st_context_iface *stctxi, unsigned flags,
struct pipe_fence_handle **fence)
{
struct vg_context *ctx = (struct vg_context *) stctxi;
- ctx->pipe->flush(ctx->pipe, flags, fence);
- if (flags & PIPE_FLUSH_RENDER_CACHE)
+ ctx->pipe->flush(ctx->pipe, fence);
+ if (flags & ST_FLUSH_FRONT)
vg_manager_flush_frontbuffer(ctx);
}
@@ -174,6 +173,9 @@ vg_api_create_context(struct st_api *stapi, struct st_manager *smapi,
if (attribs->major > 1 || (attribs->major == 1 && attribs->minor > 0))
return NULL;
+ /* for VGHandle / pointer lookups */
+ init_handles();
+
pipe = smapi->screen->context_create(smapi->screen, NULL);
if (!pipe)
return NULL;
diff --git a/src/gallium/state_trackers/vega/vgu.c b/src/gallium/state_trackers/vega/vgu.c
index 7dc51c5599..4206a91e00 100644
--- a/src/gallium/state_trackers/vega/vgu.c
+++ b/src/gallium/state_trackers/vega/vgu.c
@@ -29,6 +29,7 @@
#include "matrix.h"
#include "path.h"
+#include "handle.h"
#include "util/u_debug.h"
#include "util/u_pointer.h"
@@ -36,16 +37,6 @@
#include <math.h>
#include <assert.h>
-static VGboolean is_aligned_to(const void *ptr, VGbyte alignment)
-{
- void *aligned = align_pointer(ptr, alignment);
- return (ptr == aligned) ? VG_TRUE : VG_FALSE;
-}
-
-static VGboolean is_aligned(const void *ptr)
-{
- return is_aligned_to(ptr, 4);
-}
static void vgu_append_float_coords(VGPath path,
const VGubyte *cmds,
@@ -54,7 +45,7 @@ static void vgu_append_float_coords(VGPath path,
VGint num_coords)
{
VGubyte common_data[40 * sizeof(VGfloat)];
- struct path *p = (struct path *)path;
+ struct path *p = handle_to_path(path);
vg_float_to_datatype(path_datatype(p), common_data, coords, num_coords);
vgAppendPathData(path, num_cmds, cmds, common_data);
diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript
index 1b7597de44..7cb953ba74 100644
--- a/src/gallium/state_trackers/wgl/SConscript
+++ b/src/gallium/state_trackers/wgl/SConscript
@@ -15,6 +15,9 @@ env.AppendUnique(CPPDEFINES = [
'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers
'WIN32_LEAN_AND_MEAN', # http://msdn2.microsoft.com/en-us/library/6dwk3a1z.aspx
])
+if not env['gles']:
+ # prevent _glapi_* from being declared __declspec(dllimport)
+ env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS'])
sources = [
'stw_context.c',
diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c
index cd4f3c8b3e..5608d4f4ce 100644
--- a/src/gallium/state_trackers/wgl/stw_context.c
+++ b/src/gallium/state_trackers/wgl/stw_context.c
@@ -277,7 +277,7 @@ stw_make_current(
return TRUE;
}
} else {
- curctx->st->flush(curctx->st, PIPE_FLUSH_RENDER_CACHE, NULL);
+ curctx->st->flush(curctx->st, ST_FLUSH_FRONT, NULL);
}
}
@@ -351,11 +351,7 @@ stw_flush_current_locked( struct stw_framebuffer *fb )
struct stw_context *ctx = stw_current_context();
if (ctx && ctx->current_framebuffer == fb) {
- ctx->st->flush(ctx->st,
- PIPE_FLUSH_RENDER_CACHE |
- PIPE_FLUSH_SWAPBUFFERS |
- PIPE_FLUSH_FRAME,
- NULL);
+ ctx->st->flush(ctx->st, ST_FLUSH_FRONT, NULL);
}
}
diff --git a/src/gallium/state_trackers/wgl/stw_device.c b/src/gallium/state_trackers/wgl/stw_device.c
index c4822d4d8a..e65e71dc15 100644
--- a/src/gallium/state_trackers/wgl/stw_device.c
+++ b/src/gallium/state_trackers/wgl/stw_device.c
@@ -41,8 +41,6 @@
#include "stw_framebuffer.h"
#include "stw_st.h"
-extern _glthread_Mutex OneTimeLock;
-
struct stw_device *stw_dev = NULL;
@@ -50,7 +48,19 @@ static int
stw_get_param(struct st_manager *smapi,
enum st_manager_param param)
{
- return 0;
+ switch (param) {
+ case ST_MANAGER_BROKEN_INVALIDATE:
+ /*
+ * Force framebuffer validation on glViewport.
+ *
+ * Certain applications, like Rhinoceros 4, uses glReadPixels
+ * exclusively (never uses SwapBuffers), so framebuffers never get
+ * resized unless we check on glViewport.
+ */
+ return 1;
+ default:
+ return 0;
+ }
}
boolean
@@ -74,8 +84,6 @@ stw_init(const struct stw_winsys *stw_winsys)
stw_dev->stw_winsys = stw_winsys;
- _glthread_INIT_MUTEX(OneTimeLock);
-
stw_dev->stapi = stw_st_create_api();
stw_dev->smapi = CALLOC_STRUCT(st_manager);
if (!stw_dev->stapi || !stw_dev->smapi)
@@ -168,9 +176,10 @@ stw_cleanup(void)
stw_dev->screen->destroy(stw_dev->screen);
- _glthread_DESTROY_MUTEX(OneTimeLock);
-
+ /* glapi is statically linked: we can call the local destroy function. */
+#ifdef _GLAPI_NO_EXPORTS
_glapi_destroy_multithread();
+#endif
#ifdef DEBUG
debug_memory_end(stw_dev->memdbg_no);
diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c
index 5ede1d4322..333f50c25f 100644
--- a/src/gallium/state_trackers/wgl/stw_pixelformat.c
+++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c
@@ -225,7 +225,7 @@ stw_pixelformat_init( void )
if(!screen->is_format_supported(screen, color->format, PIPE_TEXTURE_2D,
0, PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET, 0))
+ PIPE_BIND_DISPLAY_TARGET))
continue;
for(k = 0; k < Elements(stw_pf_doublebuffer); ++k) {
@@ -235,7 +235,7 @@ stw_pixelformat_init( void )
const struct stw_pf_depth_info *depth = &stw_pf_depth_stencil[l];
if(!screen->is_format_supported(screen, depth->format, PIPE_TEXTURE_2D,
- 0, PIPE_BIND_DEPTH_STENCIL, 0))
+ 0, PIPE_BIND_DEPTH_STENCIL))
continue;
stw_pixelformat_add( stw_dev, color, depth, 0, doublebuffer, samples );
diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c
index 28e30e09ff..d751ac1870 100644
--- a/src/gallium/state_trackers/xorg/xorg_crtc.c
+++ b/src/gallium/state_trackers/xorg/xorg_crtc.c
@@ -133,6 +133,14 @@ crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
if (ret)
return FALSE;
+ /* Only set gamma when needed, to avoid unneeded delays. */
+#if defined(XF86_CRTC_VERSION) && XF86_CRTC_VERSION >= 3
+ if (!crtc->active && crtc->version >= 3)
+ crtc->funcs->gamma_set(crtc, crtc->gamma_red, crtc->gamma_green,
+ crtc->gamma_blue, crtc->gamma_size);
+ crtc->active = TRUE;
+#endif
+
crtc->x = x;
crtc->y = y;
crtc->mode = *mode;
@@ -145,7 +153,10 @@ static void
crtc_gamma_set(xf86CrtcPtr crtc, CARD16 * red, CARD16 * green, CARD16 * blue,
int size)
{
- /* XXX: hockup */
+ modesettingPtr ms = modesettingPTR(crtc->scrn);
+ struct crtc_private *crtcp = crtc->driver_private;
+
+ drmModeCrtcSetGamma(ms->fd, crtcp->drm_crtc->crtc_id, size, red, green, blue);
}
#if 0 /* Implement and enable to enable rotation and reflection. */
diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c
index 17c34b7eac..6f2c52eabb 100644
--- a/src/gallium/state_trackers/xorg/xorg_dri2.c
+++ b/src/gallium/state_trackers/xorg/xorg_dri2.c
@@ -336,7 +336,7 @@ dri2_copy_region(DrawablePtr pDraw, RegionPtr pRegion,
/* pixmap glXWaitX */
if (pSrcBuffer->attachment == DRI2BufferFrontLeft &&
pDestBuffer->attachment == DRI2BufferFakeFrontLeft) {
- ms->ctx->flush(ms->ctx, PIPE_FLUSH_SWAPBUFFERS, NULL);
+ ms->ctx->flush(ms->ctx, NULL);
return;
}
/* pixmap glXWaitGL */
@@ -362,7 +362,8 @@ dri2_copy_region(DrawablePtr pDraw, RegionPtr pRegion,
if (extents->x1 == 0 && extents->y1 == 0 &&
extents->x2 == pDraw->width && extents->y2 == pDraw->height) {
- ms->screen->fence_finish(ms->screen, dst_priv->fence, 0);
+ ms->screen->fence_finish(ms->screen, dst_priv->fence,
+ PIPE_TIMEOUT_INFINITE);
ms->screen->fence_reference(ms->screen, &dst_priv->fence, NULL);
}
}
@@ -388,7 +389,7 @@ dri2_copy_region(DrawablePtr pDraw, RegionPtr pRegion,
FreeScratchGC(gc);
- ms->ctx->flush(ms->ctx, PIPE_FLUSH_SWAPBUFFERS,
+ ms->ctx->flush(ms->ctx,
(pDestBuffer->attachment == DRI2BufferFrontLeft
&& ms->swapThrottling) ?
&dst_priv->fence : NULL);
@@ -451,12 +452,12 @@ xorg_dri2_init(ScreenPtr pScreen)
ms->screen->is_format_supported(ms->screen, PIPE_FORMAT_Z24X8_UNORM,
PIPE_TEXTURE_2D,
0,
- PIPE_BIND_DEPTH_STENCIL, 0);
+ PIPE_BIND_DEPTH_STENCIL);
ms->ds_depth_bits_last =
ms->screen->is_format_supported(ms->screen, PIPE_FORMAT_Z24_UNORM_S8_USCALED,
PIPE_TEXTURE_2D,
0,
- PIPE_BIND_DEPTH_STENCIL, 0);
+ PIPE_BIND_DEPTH_STENCIL);
return DRI2ScreenInit(pScreen, &dri2info);
}
diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c
index 66685ecec6..19e9bf8465 100644
--- a/src/gallium/state_trackers/xorg/xorg_driver.c
+++ b/src/gallium/state_trackers/xorg/xorg_driver.c
@@ -44,6 +44,7 @@
#include "xf86Crtc.h"
#include "miscstruct.h"
#include "dixstruct.h"
+#include "xf86cmap.h"
#include "xf86xv.h"
#include "xorgVersion.h"
#ifndef XSERVER_LIBPCIACCESS
@@ -333,7 +334,8 @@ drv_cleanup_fences(ScrnInfoPtr pScrn)
for (i = 0; i < XORG_NR_FENCES; i++) {
if (ms->fence[i]) {
- ms->screen->fence_finish(ms->screen, ms->fence[i], 0);
+ ms->screen->fence_finish(ms->screen, ms->fence[i],
+ PIPE_TIMEOUT_INFINITE);
ms->screen->fence_reference(ms->screen, &ms->fence[i], NULL);
}
}
@@ -414,6 +416,7 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags)
return FALSE;
switch (pScrn->depth) {
+ case 8:
case 15:
case 16:
case 24:
@@ -545,7 +548,7 @@ void xorg_flush(ScreenPtr pScreen)
if (ms->ctx) {
int j;
- ms->ctx->flush(ms->ctx, PIPE_FLUSH_RENDER_CACHE,
+ ms->ctx->flush(ms->ctx,
ms->dirtyThrottling ?
&ms->fence[XORG_NR_FENCES-1] :
NULL);
@@ -553,7 +556,8 @@ void xorg_flush(ScreenPtr pScreen)
if (ms->dirtyThrottling) {
if (ms->fence[0])
ms->ctx->screen->fence_finish(ms->ctx->screen,
- ms->fence[0], 0);
+ ms->fence[0],
+ PIPE_TIMEOUT_INFINITE);
/* The amount of rendering generated by a block handler can be
* quite small. Let us get a fair way ahead of hardware before
@@ -677,6 +681,65 @@ drv_set_master(ScrnInfoPtr pScrn)
}
+static void drv_load_palette(ScrnInfoPtr pScrn, int numColors,
+ int *indices, LOCO *colors, VisualPtr pVisual)
+{
+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
+ modesettingPtr ms = modesettingPTR(pScrn);
+ int index, j, i;
+ int c;
+
+ switch(pScrn->depth) {
+ case 15:
+ for (i = 0; i < numColors; i++) {
+ index = indices[i];
+ for (j = 0; j < 8; j++) {
+ ms->lut_r[index * 8 + j] = colors[index].red << 8;
+ ms->lut_g[index * 8 + j] = colors[index].green << 8;
+ ms->lut_b[index * 8 + j] = colors[index].blue << 8;
+ }
+ }
+ break;
+ case 16:
+ for (i = 0; i < numColors; i++) {
+ index = indices[i];
+
+ if (index < 32) {
+ for (j = 0; j < 8; j++) {
+ ms->lut_r[index * 8 + j] = colors[index].red << 8;
+ ms->lut_b[index * 8 + j] = colors[index].blue << 8;
+ }
+ }
+
+ for (j = 0; j < 4; j++) {
+ ms->lut_g[index * 4 + j] = colors[index].green << 8;
+ }
+ }
+ break;
+ default:
+ for (i = 0; i < numColors; i++) {
+ index = indices[i];
+ ms->lut_r[index] = colors[index].red << 8;
+ ms->lut_g[index] = colors[index].green << 8;
+ ms->lut_b[index] = colors[index].blue << 8;
+ }
+ break;
+ }
+
+ for (c = 0; c < xf86_config->num_crtc; c++) {
+ xf86CrtcPtr crtc = xf86_config->crtc[c];
+
+ /* Make the change through RandR */
+#ifdef RANDR_12_INTERFACE
+ if (crtc->randr_crtc)
+ RRCrtcGammaSet(crtc->randr_crtc, ms->lut_r, ms->lut_g, ms->lut_b);
+ else
+#endif
+ crtc->funcs->gamma_set(crtc, ms->lut_r, ms->lut_g, ms->lut_b, 256);
+ }
+}
+
+
static Bool
drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
{
@@ -816,6 +879,10 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
if (!miCreateDefColormap(pScreen))
return FALSE;
+ if (!xf86HandleColormaps(pScreen, 256, 8, drv_load_palette, NULL,
+ CMAP_PALETTED_TRUECOLOR |
+ CMAP_RELOAD_ON_MODE_SWITCH))
+ return FALSE;
xf86DPMSInit(pScreen, xf86DPMSSet, 0);
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c
index 718a345393..e7d6a93e5c 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa.c
@@ -349,7 +349,7 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg)
if (!exa->scrn->is_format_supported(exa->scrn, priv->tex->format,
priv->tex->target, 0,
- PIPE_BIND_RENDER_TARGET, 0)) {
+ PIPE_BIND_RENDER_TARGET)) {
XORG_FALLBACK("format %s", util_format_name(priv->tex->format));
}
@@ -430,12 +430,12 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir,
if (!exa->scrn->is_format_supported(exa->scrn, priv->tex->format,
priv->tex->target, 0,
- PIPE_BIND_RENDER_TARGET, 0))
+ PIPE_BIND_RENDER_TARGET))
XORG_FALLBACK("pDst format %s", util_format_name(priv->tex->format));
if (!exa->scrn->is_format_supported(exa->scrn, src_priv->tex->format,
src_priv->tex->target, 0,
- PIPE_BIND_SAMPLER_VIEW, 0))
+ PIPE_BIND_SAMPLER_VIEW))
XORG_FALLBACK("pSrc format %s", util_format_name(src_priv->tex->format));
exa->copy.src = src_priv;
@@ -630,7 +630,7 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture,
if (!exa->scrn->is_format_supported(exa->scrn, priv->tex->format,
priv->tex->target, 0,
- PIPE_BIND_RENDER_TARGET, 0))
+ PIPE_BIND_RENDER_TARGET))
XORG_FALLBACK("pDst format: %s", util_format_name(priv->tex->format));
if (priv->picture_format != pDstPicture->format)
@@ -645,7 +645,7 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture,
if (!exa->scrn->is_format_supported(exa->scrn, priv->tex->format,
priv->tex->target, 0,
- PIPE_BIND_SAMPLER_VIEW, 0))
+ PIPE_BIND_SAMPLER_VIEW))
XORG_FALLBACK("pSrc format: %s", util_format_name(priv->tex->format));
if (!picture_check_formats(priv, pSrcPicture))
@@ -662,7 +662,7 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture,
if (!exa->scrn->is_format_supported(exa->scrn, priv->tex->format,
priv->tex->target, 0,
- PIPE_BIND_SAMPLER_VIEW, 0))
+ PIPE_BIND_SAMPLER_VIEW))
XORG_FALLBACK("pMask format: %s", util_format_name(priv->tex->format));
if (!picture_check_formats(priv, pMaskPicture))
@@ -1072,19 +1072,20 @@ xorg_gpu_surface(struct pipe_context *pipe, struct exa_pixmap_priv *priv)
}
-void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags,
+void xorg_exa_flush(struct exa_context *exa,
struct pipe_fence_handle **fence)
{
- exa->pipe->flush(exa->pipe, pipeFlushFlags, fence);
+ exa->pipe->flush(exa->pipe, fence);
}
void xorg_exa_finish(struct exa_context *exa)
{
struct pipe_fence_handle *fence = NULL;
- xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, &fence);
+ xorg_exa_flush(exa, &fence);
- exa->pipe->screen->fence_finish(exa->pipe->screen, fence, 0);
+ exa->pipe->screen->fence_finish(exa->pipe->screen, fence,
+ PIPE_TIMEOUT_INFINITE);
exa->pipe->screen->fence_reference(exa->pipe->screen, &fence, NULL);
}
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.h b/src/gallium/state_trackers/xorg/xorg_exa.h
index 1f78f60be7..30b6f0ce46 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.h
+++ b/src/gallium/state_trackers/xorg/xorg_exa.h
@@ -74,7 +74,7 @@ do { \
struct pipe_surface *
xorg_gpu_surface(struct pipe_context *pipe, struct exa_pixmap_priv *priv);
-void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags,
+void xorg_exa_flush(struct exa_context *exa,
struct pipe_fence_handle **fence);
void xorg_exa_finish(struct exa_context *exa);
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c
index a3d7c5a70e..6b799af90c 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.c
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.c
@@ -73,7 +73,7 @@ renderer_draw(struct xorg_renderer *r)
if (buf) {
cso_set_vertex_elements(r->cso, r->attrs_per_vertex, r->velems);
- util_draw_vertex_buffer(pipe, buf, 0,
+ util_draw_vertex_buffer(pipe, r->cso, buf, 0,
PIPE_PRIM_QUADS,
num_verts, /* verts */
r->attrs_per_vertex); /* attribs/vert */
@@ -429,6 +429,7 @@ void renderer_set_constants(struct xorg_renderer *r,
pipe_resource_reference(cbuf, NULL);
*cbuf = pipe_buffer_create(r->pipe->screen,
PIPE_BIND_CONSTANT_BUFFER,
+ PIPE_USAGE_STATIC,
param_bytes);
if (*cbuf) {
@@ -449,8 +450,7 @@ void renderer_copy_prepare(struct xorg_renderer *r,
assert(screen->is_format_supported(screen, dst_surface->format,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_RENDER_TARGET,
- 0));
+ PIPE_BIND_RENDER_TARGET));
(void) screen;
@@ -518,14 +518,10 @@ renderer_clone_texture(struct xorg_renderer *r,
struct pipe_resource *pt;
struct pipe_resource templ;
- if (pipe->is_resource_referenced(pipe, src, 0, 0) &
- PIPE_REFERENCED_FOR_WRITE)
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
-
/* the coming in texture should already have that invariance */
debug_assert(screen->is_format_supported(screen, src->format,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_SAMPLER_VIEW, 0));
+ PIPE_BIND_SAMPLER_VIEW));
format = src->format;
@@ -616,7 +612,7 @@ void renderer_draw_yuv(struct xorg_renderer *r,
cso_set_vertex_elements(r->cso, num_attribs, r->velems);
- util_draw_vertex_buffer(pipe, buf, 0,
+ util_draw_vertex_buffer(pipe, r->cso, buf, 0,
PIPE_PRIM_QUADS,
4, /* verts */
num_attribs); /* attribs/vert */
diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h
index 56397b8fea..664e8c7573 100644
--- a/src/gallium/state_trackers/xorg/xorg_tracker.h
+++ b/src/gallium/state_trackers/xorg/xorg_tracker.h
@@ -129,6 +129,7 @@ typedef struct _modesettingRec
/* kms */
struct kms_driver *kms;
struct kms_bo *root_bo;
+ uint16_t lut_r[256], lut_g[256], lut_b[256];
/* gallium */
struct pipe_screen *screen;
diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
index c72ba9ef8d..234574b968 100644
--- a/src/gallium/state_trackers/xorg/xorg_xv.c
+++ b/src/gallium/state_trackers/xorg/xorg_xv.c
@@ -461,20 +461,6 @@ bind_shaders(struct xorg_xv_port_priv *port)
cso_set_fragment_shader_handle(port->r->cso, shader.fs);
}
-static INLINE void
-conditional_flush(struct pipe_context *pipe, struct pipe_resource **tex,
- int num)
-{
- int i;
- for (i = 0; i < num; ++i) {
- if (tex[i] && pipe->is_resource_referenced(pipe, tex[i], 0, 0) &
- PIPE_REFERENCED_FOR_WRITE) {
- pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
- return;
- }
- }
-}
-
static void
bind_samplers(struct xorg_xv_port_priv *port)
{
@@ -485,8 +471,6 @@ bind_samplers(struct xorg_xv_port_priv *port)
memset(&sampler, 0, sizeof(struct pipe_sampler_state));
- conditional_flush(port->r->pipe, dst, 3);
-
sampler.wrap_s = PIPE_TEX_WRAP_CLAMP;
sampler.wrap_t = PIPE_TEX_WRAP_CLAMP;
sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
diff --git a/src/gallium/targets/Android.mk b/src/gallium/targets/Android.mk
index 110284c3c9..2101cd3495 100644
--- a/src/gallium/targets/Android.mk
+++ b/src/gallium/targets/Android.mk
@@ -1,16 +1,24 @@
+# Android.mk for Gallium EGL and gralloc
+
LOCAL_PATH := $(call my-dir)
-gles_SRC_FILES := \
- egl-gdi/egl-static.c
+# EGL
+include $(CLEAR_VARS)
-gles_CFLAGS := \
+LOCAL_SRC_FILES := \
+ egl-static/egl.c \
+ egl-static/egl_pipe.c \
+ egl-static/egl_st.c
+
+LOCAL_CFLAGS := \
-DFEATURE_ES1=1 \
-DFEATURE_ES2=1 \
-DGALLIUM_SOFTPIPE \
+ -D_EGL_MAIN=_eglBuiltInDriverGALLIUM \
-fvisibility=hidden \
-Wno-sign-compare
-gles_C_INCLUDES := \
+LOCAL_C_INCLUDES := \
external/mesa/include \
external/mesa/src/gallium/include \
external/mesa/src/gallium/auxiliary \
@@ -21,136 +29,92 @@ gles_C_INCLUDES := \
external/mesa/src/egl/main \
external/mesa/src/mesa
-gles_STATIC_LIBRARIES := \
+LOCAL_STATIC_LIBRARIES := \
libmesa_st_egl \
libmesa_winsys_sw \
libmesa_pipe_softpipe \
libmesa_st_mesa \
libmesa_glsl \
libmesa_st_mesa \
- libmesa_talloc \
libmesa_gallium
-gles_WHOLE_STATIC_LIBRARIES := \
- libmesa_egl \
- libmesa_glapi
+LOCAL_WHOLE_STATIC_LIBRARIES := \
+ libmesa_egl
-gles_SHARED_LIBRARIES := \
+LOCAL_SHARED_LIBRARIES := \
+ libglapi \
libdl \
libhardware \
liblog \
libcutils
-gralloc_SRC_FILES :=
-
-gralloc_CFLAGS := \
- -fvisibility=hidden \
- -Wno-sign-compare
-
-gralloc_C_INCLUDES := \
- external/mesa/src/gallium/include \
- external/mesa/src/gallium/auxiliary \
- external/mesa/src/gallium/drivers \
- external/mesa/src/gallium/winsys
-
-gralloc_STATIC_LIBRARIES := \
- libmesa_gallium
-
-gralloc_WHOLE_STATIC_LIBRARIES := \
- libmesa_st_gralloc
-
-gralloc_SHARED_LIBRARIES := \
- libdl \
- liblog \
- libcutils \
- libdrm \
- libEGL
-
-ifeq ($(strip $(MESA_BUILD_SWRAST)),true)
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
- $(gles_SRC_FILES)
-
-LOCAL_CFLAGS := \
- $(gles_CFLAGS)
-
-LOCAL_C_INCLUDES := \
- $(gles_C_INCLUDES)
-
-LOCAL_STATIC_LIBRARIES := \
- $(gles_STATIC_LIBRARIES)
-
-LOCAL_WHOLE_STATIC_LIBRARIES := \
- $(gles_WHOLE_STATIC_LIBRARIES)
-
-LOCAL_SHARED_LIBRARIES := \
- $(gles_SHARED_LIBRARIES)
-
-LOCAL_MODULE := libGLES_swrast
-LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl
-
-include $(BUILD_SHARED_LIBRARY)
-endif # MESA_BUILD_SWRAST
+ifeq ($(strip $(MESA_BUILD_CLASSIC)),true)
+LOCAL_STATIC_LIBRARIES := \
+ libmesa_classic_egl \
+ $(LOCAL_STATIC_LIBRARIES)
+LOCAL_SHARED_LIBRARIES += libdrm
+endif # MESA_BUILD_CLASSIC
ifeq ($(strip $(MESA_BUILD_I915G)),true)
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
- $(gles_SRC_FILES) \
- egl/pipe_i915.c
-
-LOCAL_CFLAGS := \
- $(gles_CFLAGS) \
- -DFEATURE_DRM=1
-
-LOCAL_C_INCLUDES := \
- $(gles_C_INCLUDES)
-
+LOCAL_CFLAGS += -D_EGL_PIPE_I915
LOCAL_STATIC_LIBRARIES := \
libmesa_winsys_i915 \
libmesa_pipe_i915 \
- $(gles_STATIC_LIBRARIES)
-
-LOCAL_WHOLE_STATIC_LIBRARIES := \
- $(gles_WHOLE_STATIC_LIBRARIES)
-
-LOCAL_SHARED_LIBRARIES := \
- $(gles_SHARED_LIBRARIES) \
+ $(LOCAL_STATIC_LIBRARIES)
+LOCAL_SHARED_LIBRARIES += \
libdrm \
libdrm_intel
+endif # MESA_BUILD_I915G
-LOCAL_MODULE := libGLES_i915g
+ifeq ($(strip $(MESA_BUILD_R600G)),true)
+LOCAL_CFLAGS += -D_EGL_PIPE_R600
+LOCAL_STATIC_LIBRARIES := \
+ libmesa_pipe_r600 \
+ libmesa_winsys_r600 \
+ $(LOCAL_STATIC_LIBRARIES)
+LOCAL_SHARED_LIBRARIES += \
+ libdrm \
+ libdrm_radeon
+endif # MESA_BUILD_R600G
+
+LOCAL_MODULE := libGLES_mesa
LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl
include $(BUILD_SHARED_LIBRARY)
-endif # MESA_BUILD_I915G
+# gralloc (broken!)
ifeq ($(strip $(MESA_BUILD_VMWGFX)),true)
include $(CLEAR_VARS)
-LOCAL_SRC_FILES := \
- egl/pipe_vmwgfx.c \
- $(gralloc_SRC_FILES)
+LOCAL_SRC_FILES := egl/egl_pipe.c
LOCAL_CFLAGS := \
- $(gralloc_CFLAGS)
+ -D_EGL_PIPE_VMWGFX=1 \
+ -fvisibility=hidden \
+ -Wno-sign-compare
LOCAL_C_INCLUDES := \
- $(gralloc_C_INCLUDES)
+ external/mesa/src/gallium/include \
+ external/mesa/src/gallium/auxiliary \
+ external/mesa/src/gallium/drivers \
+ external/mesa/src/gallium/winsys
LOCAL_STATIC_LIBRARIES := \
libmesa_pipe_svga \
libmesa_winsys_svga \
- $(gralloc_STATIC_LIBRARIES)
+ libmesa_gallium
LOCAL_WHOLE_STATIC_LIBRARIES := \
- $(gralloc_WHOLE_STATIC_LIBRARIES)
+ libmesa_st_gralloc
LOCAL_SHARED_LIBRARIES := \
- $(gralloc_SHARED_LIBRARIES)
+ libdl \
+ liblog \
+ libcutils \
+ libdrm \
+ libEGL
-LOCAL_MODULE := gralloc.vmwgfx
+LOCAL_MODULE := gralloc.mesa
LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/hw
include $(BUILD_SHARED_LIBRARY)
diff --git a/src/gallium/targets/SConscript.dri b/src/gallium/targets/SConscript.dri
index bc3671a256..101863a684 100644
--- a/src/gallium/targets/SConscript.dri
+++ b/src/gallium/targets/SConscript.dri
@@ -72,7 +72,6 @@ COMMON_DRI_DRM_OBJECTS = [
drienv.AppendUnique(LIBS = [
'expat',
- 'talloc',
])
Export([
diff --git a/src/gallium/targets/dri-noop/Makefile b/src/gallium/targets/dri-noop/Makefile
deleted file mode 100644
index 21c5f4f9f2..0000000000
--- a/src/gallium/targets/dri-noop/Makefile
+++ /dev/null
@@ -1,34 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = noop_dri.so
-
-DRIVER_DEFINES = \
- -D__NOT_HAVE_DRM_H
-
-PIPE_DRIVERS = \
- $(TOP)/src/gallium/state_trackers/dri/sw/libdrisw.a \
- $(TOP)/src/gallium/winsys/sw/dri/libswdri.a \
- $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
- $(TOP)/src/gallium/drivers/trace/libtrace.a \
- $(TOP)/src/gallium/drivers/rbug/librbug.a \
- $(TOP)/src/gallium/drivers/noop/libnoop.a
-
-SWRAST_COMMON_GALLIUM_SOURCES = \
- $(TOP)/src/mesa/drivers/dri/common/utils.c \
- $(TOP)/src/mesa/drivers/dri/common/drisw_util.c \
- $(TOP)/src/mesa/drivers/dri/common/xmlconfig.c
-
-C_SOURCES = \
- swrast_drm_api.c \
- $(SWRAST_COMMON_GALLIUM_SOURCES) \
- $(DRIVER_SOURCES)
-
-ASM_SOURCES =
-
-include ../Makefile.dri
-
-INCLUDES += \
- -I$(TOP)/src/gallium/winsys/sw/dri
-
-symlinks:
diff --git a/src/gallium/targets/dri-noop/SConscript b/src/gallium/targets/dri-noop/SConscript
deleted file mode 100644
index 9c04ee6631..0000000000
--- a/src/gallium/targets/dri-noop/SConscript
+++ /dev/null
@@ -1,31 +0,0 @@
-Import('*')
-
-env = drienv.Clone()
-
-env.Append(CPPPATH = [
- '#/src/gallium/winsys/sw/dri',
-])
-
-env.Prepend(LIBS = [
- st_drisw,
- ws_dri,
- noop,
- mesa,
- glsl,
- gallium,
- COMMON_DRI_SW_OBJECTS
-])
-
-env.Prepend(LIBS = [noop])
-
-swrastg_sources = [
- 'swrast_drm_api.c'
-]
-
-module = env.LoadableModule(
- target ='noop_dri.so',
- source = swrastg_sources,
- SHLIBPREFIX = '',
-)
-
-env.Alias('dri-noop', module)
diff --git a/src/gallium/targets/dri-nouveau/Makefile b/src/gallium/targets/dri-nouveau/Makefile
index 2f64f312b8..eb1ee859a0 100644
--- a/src/gallium/targets/dri-nouveau/Makefile
+++ b/src/gallium/targets/dri-nouveau/Makefile
@@ -10,6 +10,7 @@ PIPE_DRIVERS = \
$(TOP)/src/gallium/drivers/rbug/librbug.a \
$(TOP)/src/gallium/drivers/nvfx/libnvfx.a \
$(TOP)/src/gallium/drivers/nv50/libnv50.a \
+ $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \
$(TOP)/src/gallium/drivers/nouveau/libnouveau.a
C_SOURCES = \
diff --git a/src/gallium/targets/dri-r300/Makefile b/src/gallium/targets/dri-r300/Makefile
index 9afbb13276..cc77a4bc20 100644
--- a/src/gallium/targets/dri-r300/Makefile
+++ b/src/gallium/targets/dri-r300/Makefile
@@ -22,6 +22,4 @@ DRIVER_DEFINES = \
include ../Makefile.dri
-DRI_LIB_DEPS += -ldrm_radeon
-
symlinks:
diff --git a/src/gallium/targets/dri-r300/SConscript b/src/gallium/targets/dri-r300/SConscript
index 005b4bbf7f..683b6c6972 100644
--- a/src/gallium/targets/dri-r300/SConscript
+++ b/src/gallium/targets/dri-r300/SConscript
@@ -2,8 +2,6 @@ Import('*')
env = drienv.Clone()
-env.ParseConfig('pkg-config --cflags --libs libdrm_radeon')
-
env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE', 'GALLIUM_GALAHAD'])
env.Prepend(LIBS = [
diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile
index 661283de6a..c8fae2d858 100644
--- a/src/gallium/targets/dri-r600/Makefile
+++ b/src/gallium/targets/dri-r600/Makefile
@@ -9,7 +9,8 @@ PIPE_DRIVERS = \
$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
$(TOP)/src/gallium/drivers/trace/libtrace.a \
- $(TOP)/src/gallium/drivers/rbug/librbug.a
+ $(TOP)/src/gallium/drivers/rbug/librbug.a \
+ $(TOP)/src/gallium/drivers/noop/libnoop.a
C_SOURCES = \
target.c \
@@ -17,7 +18,7 @@ C_SOURCES = \
$(DRIVER_SOURCES)
DRIVER_DEFINES = \
- -DGALLIUM_RBUG -DGALLIUM_TRACE
+ -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_NOOP
include ../Makefile.dri
diff --git a/src/gallium/targets/dri-r600/SConscript b/src/gallium/targets/dri-r600/SConscript
index aa771db2d1..1df11a8747 100644
--- a/src/gallium/targets/dri-r600/SConscript
+++ b/src/gallium/targets/dri-r600/SConscript
@@ -2,8 +2,6 @@ Import('*')
env = drienv.Clone()
-env.ParseConfig('pkg-config --cflags --libs libdrm_radeon')
-
env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE'])
env.Prepend(LIBS = [
diff --git a/src/gallium/targets/dri-r600/target.c b/src/gallium/targets/dri-r600/target.c
index 8753e2bab1..2fe345402d 100644
--- a/src/gallium/targets/dri-r600/target.c
+++ b/src/gallium/targets/dri-r600/target.c
@@ -1,5 +1,5 @@
#include "state_tracker/drm_driver.h"
-#include "target-helpers/inline_debug_helper.h"
+#include "target-helpers/inline_noop_helper.h"
#include "r600/drm/r600_drm_public.h"
#include "r600/r600_public.h"
diff --git a/src/gallium/targets/dri-vmwgfx/Makefile b/src/gallium/targets/dri-vmwgfx/Makefile
index 38f78932e1..8c716f42fe 100644
--- a/src/gallium/targets/dri-vmwgfx/Makefile
+++ b/src/gallium/targets/dri-vmwgfx/Makefile
@@ -6,6 +6,7 @@ LIBNAME = vmwgfx_dri.so
PIPE_DRIVERS = \
$(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \
$(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \
+ $(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a \
$(TOP)/src/gallium/drivers/trace/libtrace.a \
$(TOP)/src/gallium/drivers/rbug/librbug.a \
$(TOP)/src/gallium/drivers/svga/libsvga.a
diff --git a/src/gallium/targets/dri-vmwgfx/SConscript b/src/gallium/targets/dri-vmwgfx/SConscript
index 7888e4f2c8..17dd021051 100644
--- a/src/gallium/targets/dri-vmwgfx/SConscript
+++ b/src/gallium/targets/dri-vmwgfx/SConscript
@@ -2,14 +2,14 @@ Import('*')
env = drienv.Clone()
-env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE'])
+if True:
+ env.Append(CPPDEFINES = ['GALLIUM_TRACE', 'GALLIUM_RBUG', 'GALLIUM_GALAHAD', 'GALLIUM_SOFTPIPE'])
+ env.Prepend(LIBS = [trace, rbug, galahad, softpipe, ws_wrapper])
env.Prepend(LIBS = [
st_dri,
svgadrm,
svga,
- trace,
- rbug,
mesa,
glsl,
gallium,
@@ -22,4 +22,4 @@ module = env.LoadableModule(
SHLIBPREFIX = '',
)
-env.Alias('dri-vmwgfx', module) \ No newline at end of file
+env.Alias('dri-vmwgfx', module)
diff --git a/src/gallium/targets/dri-vmwgfx/target.c b/src/gallium/targets/dri-vmwgfx/target.c
index 15089d6db2..1362851d6b 100644
--- a/src/gallium/targets/dri-vmwgfx/target.c
+++ b/src/gallium/targets/dri-vmwgfx/target.c
@@ -1,4 +1,5 @@
+#include "target-helpers/inline_wrapper_sw_helper.h"
#include "target-helpers/inline_debug_helper.h"
#include "state_tracker/drm_driver.h"
#include "svga/drm/svga_drm_public.h"
@@ -18,6 +19,8 @@ create_screen(int fd)
if (!screen)
return NULL;
+ screen = sw_screen_wrap(screen);
+
screen = debug_screen_wrap(screen);
return screen;
diff --git a/src/gallium/targets/egl-gdi/SConscript b/src/gallium/targets/egl-gdi/SConscript
deleted file mode 100644
index d52eeb70fd..0000000000
--- a/src/gallium/targets/egl-gdi/SConscript
+++ /dev/null
@@ -1,55 +0,0 @@
-#######################################################################
-# SConscript for egl-gdi target
-
-Import('*')
-
-env = env.Clone()
-
-env.Append(CPPPATH = [
- '#/src/gallium/state_trackers/egl',
- '#/src/gallium/state_trackers/vega',
- '#/src/egl/main',
- '#/src/mesa',
-])
-
-env.Append(CPPDEFINES = [
- 'FEATURE_VG=1',
- 'GALLIUM_SOFTPIPE',
- 'GALLIUM_RBUG',
- 'GALLIUM_TRACE',
-])
-
-env.Append(LIBS = [
- 'gdi32',
- 'user32',
- 'kernel32',
- 'ws2_32',
-])
-
-env.Prepend(LIBS = [
- st_egl_gdi,
- ws_gdi,
- identity,
- trace,
- rbug,
- softpipe,
- vgapi,
- st_vega,
- gallium,
- egl,
-])
-
-if env['llvm']:
- env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE')
- env.Prepend(LIBS = [llvmpipe])
-
-egl_gallium = env.SharedLibrary(
- target ='egl_gallium',
- source = 'egl-static.c',
-)
-
-env['no_import_lib'] = 1
-
-egl_gdi = env.InstallSharedLibrary(egl_gallium)
-
-env.Alias('egl-gdi', egl_gdi)
diff --git a/src/gallium/targets/egl-static/SConscript b/src/gallium/targets/egl-static/SConscript
new file mode 100644
index 0000000000..cbd98cc416
--- /dev/null
+++ b/src/gallium/targets/egl-static/SConscript
@@ -0,0 +1,134 @@
+#######################################################################
+# SConscript for egl-static target
+
+Import('*')
+
+env = env.Clone()
+
+env.Append(CPPPATH = [
+ '#/include',
+ '#/src/egl/main',
+ '#/src/gallium/auxiliary',
+ '#/src/gallium/drivers',
+ '#/src/gallium/include',
+ '#/src/gallium/winsys',
+ '#/src/gallium/state_trackers/egl',
+ '#/src/gallium/state_trackers/vega',
+ '#/src/mesa',
+])
+
+env.Append(CPPDEFINES = [
+ 'GALLIUM_SOFTPIPE',
+ 'GALLIUM_RBUG',
+ 'GALLIUM_TRACE',
+ 'GALLIUM_GALAHAD',
+ '_EGL_MAIN=_eglBuiltInDriverGALLIUM',
+])
+
+env.Prepend(LIBS = [
+ softpipe,
+ rbug,
+ trace,
+ galahad,
+ gallium,
+ egl,
+ st_egl,
+])
+
+if env['llvm']:
+ env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE'])
+ env.Prepend(LIBS = [llvmpipe])
+
+sources = [
+ 'egl.c',
+ 'egl_pipe.c',
+ 'egl_st.c',
+]
+
+if env['platform'] == 'windows':
+ sources.append('#src/egl/main/egl.def')
+
+ env.Append(LIBS = [
+ 'gdi32',
+ 'user32',
+ 'kernel32',
+ 'ws2_32',
+ ])
+
+ env.Prepend(LIBS = [
+ ws_gdi,
+ ])
+
+# OpenGL ES and OpenGL
+if env['gles']:
+ env.Append(CPPDEFINES = [
+ 'FEATURE_GL=1',
+ 'FEATURE_ES1=1',
+ 'FEATURE_ES2=1'
+ ])
+ env.Prepend(LIBPATH = [shared_glapi.dir])
+ # manually add LIBPREFIX on windows
+ glapi_name = 'glapi' if env['platform'] != 'windows' else 'libglapi'
+ env.Prepend(LIBS = [glapi_name, glsl, mesa])
+
+# OpenVG
+if True:
+ env.Append(CPPDEFINES = ['FEATURE_VG=1'])
+ env.Prepend(LIBPATH = [openvg.dir])
+ # manually add LIBPREFIX on windows
+ openvg_name = 'OpenVG' if env['platform'] != 'windows' else 'libOpenVG'
+ env.Prepend(LIBS = [openvg_name, st_vega])
+
+if env['x11']:
+ env.Prepend(LIBS = [
+ ws_xlib,
+ env['X11_LIBS'],
+ ])
+
+if env['dri']:
+ env.ParseConfig('pkg-config --cflags --libs xfixes')
+
+# pipe drivers
+if env['drm']:
+ env.ParseConfig('pkg-config --cflags --libs libdrm')
+
+ if env['drm_intel']:
+ env.ParseConfig('pkg-config --cflags --libs libdrm_intel')
+ env.Append(CPPDEFINES = ['_EGL_PIPE_I915', '_EGL_PIPE_I965'])
+ env.Prepend(LIBS = [
+ i915drm,
+ i915,
+ i965drm,
+ i965,
+ ws_wrapper,
+ ])
+
+ if env['drm_radeon']:
+ env.Append(CPPDEFINES = ['_EGL_PIPE_R300', '_EGL_PIPE_R600'])
+ env.Prepend(LIBS = [
+ radeonwinsys,
+ r300,
+ r600winsys,
+ r600,
+ ])
+
+ env.Append(CPPDEFINES = ['_EGL_PIPE_VMWGFX'])
+ env.Prepend(LIBS = [
+ svgadrm,
+ svga,
+ ])
+
+# libEGL.dll
+env['LIBPREFIX'] = 'lib'
+env['SHLIBPREFIX'] = 'lib'
+
+egl_gallium = env.SharedLibrary(
+ target ='EGL',
+ source = sources,
+)
+
+env.Depends(egl_gallium, [openvg])
+
+egl_gallium = env.InstallSharedLibrary(egl_gallium, version=(1, 4, 0))
+
+env.Alias('egl-gallium', egl_gallium)
diff --git a/src/gallium/targets/egl-gdi/egl-static.c b/src/gallium/targets/egl-static/egl.c
index ff9735c29b..e617ff5020 100644
--- a/src/gallium/targets/egl-gdi/egl-static.c
+++ b/src/gallium/targets/egl-static/egl.c
@@ -1,8 +1,8 @@
/*
* Mesa 3-D graphics library
- * Version: 7.9
+ * Version: 7.10
*
- * Copyright (C) 2010 LunarG Inc.
+ * Copyright (C) 2010-2011 LunarG Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,42 +27,29 @@
*/
#include "common/egl_g3d_loader.h"
-#include "state_tracker/st_gl_api.h"
-#include "state_tracker/drm_driver.h"
-#include "vg_api.h"
-#include "target-helpers/inline_sw_helper.h"
-#include "target-helpers/inline_debug_helper.h"
#include "egldriver.h"
-static struct st_api *stapis[ST_API_COUNT];
+#include "egl_pipe.h"
+#include "egl_st.h"
+
+static struct egl_g3d_loader egl_g3d_loader;
+
+static struct st_module {
+ boolean initialized;
+ struct st_api *stapi;
+} st_modules[ST_API_COUNT];
static struct st_api *
get_st_api(enum st_api_type api)
{
- struct st_api *stapi;
+ struct st_module *stmod = &st_modules[api];
- stapi = stapis[api];
- if (stapi)
- return stapi;
-
- switch (api) {
-#if FEATURE_GL || FEATURE_ES1 || FEATURE_ES2
- case ST_API_OPENGL:
- stapi = st_gl_api_create();
- break;
-#endif
-#if FEATURE_VG
- case ST_API_OPENVG:
- stapi = (struct st_api *) vg_api_get();
- break;
-#endif
- default:
- break;
+ if (!stmod->initialized) {
+ stmod->stapi = egl_st_create_api(api);
+ stmod->initialized = TRUE;
}
- stapis[api] = stapi;
-
- return stapi;
+ return stmod->stapi;
}
static struct st_api *
@@ -74,75 +61,69 @@ guess_gl_api(enum st_profile_type profile)
static struct pipe_screen *
create_drm_screen(const char *name, int fd)
{
-#if FEATURE_DRM
- return driver_descriptor.create_screen(fd);
-#else
- return NULL;
-#endif
+ return egl_pipe_create_drm_screen(name, fd);
}
static struct pipe_screen *
create_sw_screen(struct sw_winsys *ws)
{
- struct pipe_screen *screen;
-
- screen = sw_screen_create(ws);
- if (screen)
- screen = debug_screen_wrap(screen);
-
- return screen;
+ return egl_pipe_create_swrast_screen(ws);
}
-static void
-init_loader(struct egl_g3d_loader *loader)
+static const struct egl_g3d_loader *
+loader_init(void)
{
-#if FEATURE_GL
- loader->profile_masks[ST_API_OPENGL] |= ST_PROFILE_DEFAULT_MASK;
-#endif
-#if FEATURE_ES1
- loader->profile_masks[ST_API_OPENGL] |= ST_PROFILE_OPENGL_ES1_MASK;
-#endif
-#if FEATURE_ES2
- loader->profile_masks[ST_API_OPENGL] |= ST_PROFILE_OPENGL_ES2_MASK;
-#endif
-#if FEATURE_VG
- loader->profile_masks[ST_API_OPENVG] |= ST_PROFILE_DEFAULT_MASK;
-#endif
-
- loader->get_st_api = get_st_api;
- loader->guess_gl_api = guess_gl_api;
- loader->create_drm_screen = create_drm_screen;
- loader->create_sw_screen = create_sw_screen;
+ int i;
+
+ for (i = 0; i < ST_API_COUNT; i++)
+ egl_g3d_loader.profile_masks[i] = egl_st_get_profile_mask(i);
+
+ egl_g3d_loader.get_st_api = get_st_api;
+ egl_g3d_loader.guess_gl_api = guess_gl_api;
+ egl_g3d_loader.create_drm_screen = create_drm_screen;
+ egl_g3d_loader.create_sw_screen = create_sw_screen;
+
+ return &egl_g3d_loader;
}
static void
-egl_g3d_unload(_EGLDriver *drv)
+loader_fini(void)
{
int i;
- egl_g3d_destroy_driver(drv);
-
for (i = 0; i < ST_API_COUNT; i++) {
- if (stapis[i]) {
- stapis[i]->destroy(stapis[i]);
- stapis[i] = NULL;
+ struct st_module *stmod = &st_modules[i];
+
+ if (stmod->stapi) {
+ stmod->stapi->destroy(stmod->stapi);
+ stmod->stapi = NULL;
}
+ stmod->initialized = FALSE;
}
}
-static struct egl_g3d_loader loader;
+static void
+egl_g3d_unload(_EGLDriver *drv)
+{
+ egl_g3d_destroy_driver(drv);
+ loader_fini();
+}
_EGLDriver *
-_eglMain(const char *args)
+_EGL_MAIN(const char *args)
{
+ const struct egl_g3d_loader *loader;
_EGLDriver *drv;
- init_loader(&loader);
- drv = egl_g3d_create_driver(&loader);
- if (drv) {
- drv->Name = "Gallium";
- drv->Unload = egl_g3d_unload;
+ loader = loader_init();
+ drv = egl_g3d_create_driver(loader);
+ if (!drv) {
+ loader_fini();
+ return NULL;
}
+ drv->Name = "Gallium";
+ drv->Unload = egl_g3d_unload;
+
return drv;
}
diff --git a/src/gallium/targets/egl-static/egl_pipe.c b/src/gallium/targets/egl-static/egl_pipe.c
new file mode 100644
index 0000000000..a33d419e0a
--- /dev/null
+++ b/src/gallium/targets/egl-static/egl_pipe.c
@@ -0,0 +1,215 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.10
+ *
+ * Copyright (C) 2011 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+#include "target-helpers/inline_debug_helper.h"
+#include "target-helpers/inline_sw_helper.h"
+#include "egl_pipe.h"
+
+/* for i915 */
+#include "i915/drm/i915_drm_public.h"
+#include "i915/i915_public.h"
+/* for i965 */
+#include "target-helpers/inline_wrapper_sw_helper.h"
+#include "i965/drm/i965_drm_public.h"
+#include "i965/brw_public.h"
+/* for nouveau */
+#include "nouveau/drm/nouveau_drm_public.h"
+/* for r300 */
+#include "radeon/drm/radeon_drm_public.h"
+#include "r300/r300_public.h"
+/* for r600 */
+#include "r600/drm/r600_drm_public.h"
+#include "r600/r600_public.h"
+/* for vmwgfx */
+#include "svga/drm/svga_drm_public.h"
+#include "svga/svga_public.h"
+
+static struct pipe_screen *
+pipe_i915_create_screen(int fd)
+{
+#if _EGL_PIPE_I915
+ struct i915_winsys *iws;
+ struct pipe_screen *screen;
+
+ iws = i915_drm_winsys_create(fd);
+ if (!iws)
+ return NULL;
+
+ screen = i915_screen_create(iws);
+ if (!screen)
+ return NULL;
+
+ screen = debug_screen_wrap(screen);
+
+ return screen;
+#else
+ return NULL;
+#endif
+}
+
+static struct pipe_screen *
+pipe_i965_create_screen(int fd)
+{
+#if _EGL_PIPE_I965
+ struct brw_winsys_screen *bws;
+ struct pipe_screen *screen;
+
+ bws = i965_drm_winsys_screen_create(fd);
+ if (!bws)
+ return NULL;
+
+ screen = brw_screen_create(bws);
+ if (!screen)
+ return NULL;
+
+ screen = sw_screen_wrap(screen);
+
+ screen = debug_screen_wrap(screen);
+
+ return screen;
+#else
+ return NULL;
+#endif
+}
+
+static struct pipe_screen *
+pipe_nouveau_create_screen(int fd)
+{
+#if _EGL_PIPE_NOUVEAU
+ struct pipe_screen *screen;
+
+ screen = nouveau_drm_screen_create(fd);
+ if (!screen)
+ return NULL;
+
+ screen = debug_screen_wrap(screen);
+
+ return screen;
+#else
+ return NULL;
+#endif
+}
+
+static struct pipe_screen *
+pipe_r300_create_screen(int fd)
+{
+#if _EGL_PIPE_R300
+ struct r300_winsys_screen *sws;
+ struct pipe_screen *screen;
+
+ sws = r300_drm_winsys_screen_create(fd);
+ if (!sws)
+ return NULL;
+
+ screen = r300_screen_create(sws);
+ if (!screen)
+ return NULL;
+
+ screen = debug_screen_wrap(screen);
+
+ return screen;
+#else
+ return NULL;
+#endif
+}
+
+static struct pipe_screen *
+pipe_r600_create_screen(int fd)
+{
+#if _EGL_PIPE_R600
+ struct radeon *rw;
+ struct pipe_screen *screen;
+
+ rw = r600_drm_winsys_create(fd);
+ if (!rw)
+ return NULL;
+
+ screen = r600_screen_create(rw);
+ if (!screen)
+ return NULL;
+
+ screen = debug_screen_wrap(screen);
+
+ return screen;
+#else
+ return NULL;
+#endif
+}
+
+static struct pipe_screen *
+pipe_vmwgfx_create_screen(int fd)
+{
+#if _EGL_PIPE_VMWGFX
+ struct svga_winsys_screen *sws;
+ struct pipe_screen *screen;
+
+ sws = svga_drm_winsys_screen_create(fd);
+ if (!sws)
+ return NULL;
+
+ screen = svga_screen_create(sws);
+ if (!screen)
+ return NULL;
+
+ screen = debug_screen_wrap(screen);
+
+ return screen;
+#else
+ return NULL;
+#endif
+}
+
+struct pipe_screen *
+egl_pipe_create_drm_screen(const char *name, int fd)
+{
+ if (strcmp(name, "i915") == 0)
+ return pipe_i915_create_screen(fd);
+ else if (strcmp(name, "i965") == 0)
+ return pipe_i965_create_screen(fd);
+ else if (strcmp(name, "nouveau") == 0)
+ return pipe_nouveau_create_screen(fd);
+ else if (strcmp(name, "r300") == 0)
+ return pipe_r300_create_screen(fd);
+ else if (strcmp(name, "r600") == 0)
+ return pipe_r600_create_screen(fd);
+ else if (strcmp(name, "vmwgfx") == 0)
+ return pipe_vmwgfx_create_screen(fd);
+ else
+ return NULL;
+}
+
+struct pipe_screen *
+egl_pipe_create_swrast_screen(struct sw_winsys *ws)
+{
+ struct pipe_screen *screen;
+
+ screen = sw_screen_create(ws);
+ if (screen)
+ screen = debug_screen_wrap(screen);
+
+ return screen;
+}
diff --git a/src/gallium/targets/egl-static/egl_pipe.h b/src/gallium/targets/egl-static/egl_pipe.h
new file mode 100644
index 0000000000..569b2d067c
--- /dev/null
+++ b/src/gallium/targets/egl-static/egl_pipe.h
@@ -0,0 +1,40 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.10
+ *
+ * Copyright (C) 2011 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+#ifndef _EGL_PIPE_H_
+#define _EGL_PIPE_H_
+
+struct pipe_screen;
+struct sw_winsys;
+
+struct pipe_screen *
+egl_pipe_create_drm_screen(const char *name, int fd);
+
+struct pipe_screen *
+egl_pipe_create_swrast_screen(struct sw_winsys *ws);
+
+#endif /* _EGL_PIPE_H_ */
diff --git a/src/gallium/targets/egl-static/egl_st.c b/src/gallium/targets/egl-static/egl_st.c
new file mode 100644
index 0000000000..3db52621de
--- /dev/null
+++ b/src/gallium/targets/egl-static/egl_st.c
@@ -0,0 +1,105 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.10
+ *
+ * Copyright (C) 2011 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+#include "util/u_debug.h"
+#include "state_tracker/st_api.h"
+#include "egl_st.h"
+
+/* for st/mesa */
+#include "state_tracker/st_gl_api.h"
+/* for st/vega */
+#include "vg_api.h"
+
+static struct st_api *
+st_GL_create_api(void)
+{
+#if FEATURE_GL || FEATURE_ES1 || FEATURE_ES2
+ return st_gl_api_create();
+#else
+ return NULL;
+#endif
+}
+
+static struct st_api *
+st_OpenVG_create_api(void)
+{
+#if FEATURE_VG
+ return (struct st_api *) vg_api_get();
+#else
+ return NULL;
+#endif
+}
+
+struct st_api *
+egl_st_create_api(enum st_api_type api)
+{
+ struct st_api *stapi;
+
+ switch (api) {
+ case ST_API_OPENGL:
+ stapi = st_GL_create_api();
+ break;
+ case ST_API_OPENVG:
+ stapi = st_OpenVG_create_api();
+ break;
+ default:
+ assert(!"Unknown API Type\n");
+ stapi = NULL;
+ break;
+ }
+
+ return stapi;
+}
+
+uint
+egl_st_get_profile_mask(enum st_api_type api)
+{
+ uint mask = 0x0;
+
+ switch (api) {
+ case ST_API_OPENGL:
+#if FEATURE_GL
+ mask |= ST_PROFILE_DEFAULT_MASK;
+#endif
+#if FEATURE_ES1
+ mask |= ST_PROFILE_OPENGL_ES1_MASK;
+#endif
+#if FEATURE_ES2
+ mask |= ST_PROFILE_OPENGL_ES2_MASK;
+#endif
+ break;
+ case ST_API_OPENVG:
+#if FEATURE_VG
+ mask |= ST_PROFILE_DEFAULT_MASK;
+#endif
+ break;
+ default:
+ break;
+ }
+
+ return mask;
+}
diff --git a/src/gallium/targets/egl-static/egl_st.h b/src/gallium/targets/egl-static/egl_st.h
new file mode 100644
index 0000000000..ba82faf0b0
--- /dev/null
+++ b/src/gallium/targets/egl-static/egl_st.h
@@ -0,0 +1,40 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.10
+ *
+ * Copyright (C) 2011 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+#ifndef _EGL_ST_H_
+#define _EGL_ST_H_
+
+#include "pipe/p_compiler.h"
+#include "state_tracker/st_api.h"
+
+struct st_api *
+egl_st_create_api(enum st_api_type api);
+
+uint
+egl_st_get_profile_mask(enum st_api_type api);
+
+#endif /* _EGL_ST_H_ */
diff --git a/src/gallium/targets/egl/Makefile b/src/gallium/targets/egl/Makefile
index 63e9352144..de01939e5f 100644
--- a/src/gallium/targets/egl/Makefile
+++ b/src/gallium/targets/egl/Makefile
@@ -46,6 +46,9 @@ ifneq ($(findstring x11, $(EGL_PLATFORMS)),)
egl_SYS += -lX11 -lXext -lXfixes $(LIBDRM_LIB)
egl_LIBS += $(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a
endif
+ifneq ($(findstring wayland, $(EGL_PLATFORMS)),)
+egl_SYS += $(WAYLAND_LIBS) $(LIBDRM_LIB)
+endif
ifneq ($(findstring drm, $(EGL_PLATFORMS)),)
egl_SYS += $(LIBDRM_LIB)
endif
@@ -57,12 +60,6 @@ endif
ifneq ($(filter $(GL_LIB), $(EGL_CLIENT_APIS)),)
egl_CPPFLAGS += $(API_DEFINES)
endif
-ifneq ($(filter $(GLESv1_CM_LIB), $(EGL_CLIENT_APIS)),)
-egl_CPPFLAGS += -DFEATURE_ES1=1
-endif
-ifneq ($(filter $(GLESv2_LIB), $(EGL_CLIENT_APIS)),)
-egl_CPPFLAGS += -DFEATURE_ES2=1
-endif
ifneq ($(filter $(VG_LIB), $(EGL_CLIENT_APIS)),)
egl_CPPFLAGS += -DFEATURE_VG=1
endif
@@ -80,7 +77,8 @@ i965_CPPFLAGS :=
i965_SYS := -ldrm_intel
i965_LIBS := \
$(TOP)/src/gallium/winsys/i965/drm/libi965drm.a \
- $(TOP)/src/gallium/drivers/i965/libi965.a
+ $(TOP)/src/gallium/drivers/i965/libi965.a \
+ $(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a
# nouveau pipe driver
nouveau_CPPFLAGS :=
@@ -89,11 +87,12 @@ nouveau_LIBS := \
$(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \
$(TOP)/src/gallium/drivers/nvfx/libnvfx.a \
$(TOP)/src/gallium/drivers/nv50/libnv50.a \
+ $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \
$(TOP)/src/gallium/drivers/nouveau/libnouveau.a
# r300 pipe driver
r300_CPPFLAGS :=
-r300_SYS := -ldrm -ldrm_radeon
+r300_SYS := -ldrm
r300_LIBS := \
$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
$(TOP)/src/gallium/drivers/r300/libr300.a
@@ -127,20 +126,14 @@ endif
# OpenGL state tracker
GL_CPPFLAGS := -I$(TOP)/src/mesa $(API_DEFINES)
-# do not link to $(GL_LIB) as the it supports GLES too
+ifeq ($(SHARED_GLAPI),1)
+GL_SYS := $(DRI_LIB_DEPS) -l$(GLAPI_LIB)
+else
+# cannot link to $(GL_LIB) as the app might want GL or GLES
GL_SYS := $(DRI_LIB_DEPS)
+endif
GL_LIBS := $(TOP)/src/mesa/libmesagallium.a
-# OpenGL ES 1.x state tracker
-GLESv1_CM_CPPFLAGS := -I$(TOP)/src/mesa
-GLESv1_CM_SYS := $(DRI_LIB_DEPS) -l$(GLESv1_CM_LIB)
-GLESv1_CM_LIBS := $(TOP)/src/mesa/libes1gallium.a
-
-# OpenGL ES 2.x state tracker
-GLESv2_CPPFLAGS := -I$(TOP)/src/mesa
-GLESv2_SYS := $(DRI_LIB_DEPS) -l$(GLESv2_LIB)
-GLESv2_LIBS := $(TOP)/src/mesa/libes2gallium.a
-
# OpenVG state tracker
OpenVG_CPPFLAGS := -I$(TOP)/src/gallium/state_trackers/vega
OpenVG_SYS := -lm -l$(VG_LIB)
@@ -225,12 +218,6 @@ $(OUTPUT_PATH)/$(PIPE_PREFIX)swrast.so: pipe_swrast.o $(swrast_LIBS)
$(OUTPUT_PATH)/$(ST_PREFIX)$(GL_LIB).so: st_GL.o $(GL_LIBS)
$(call mklib-cxx,GL)
-$(OUTPUT_PATH)/$(ST_PREFIX)$(GLESv1_CM_LIB).so: st_GLESv1_CM.o $(GLESv1_CM_LIBS)
- $(call mklib-cxx,GLESv1_CM)
-
-$(OUTPUT_PATH)/$(ST_PREFIX)$(GLESv2_LIB).so: st_GLESv2.o $(GLESv2_LIBS)
- $(call mklib-cxx,GLESv2)
-
$(OUTPUT_PATH)/$(ST_PREFIX)$(VG_LIB).so: st_OpenVG.o $(OpenVG_LIBS)
$(call mklib,OpenVG)
diff --git a/src/gallium/targets/egl/egl.c b/src/gallium/targets/egl/egl.c
index 786d5d1105..61fe5069e9 100644
--- a/src/gallium/targets/egl/egl.c
+++ b/src/gallium/targets/egl/egl.c
@@ -100,9 +100,14 @@ load_st_module(struct st_module *stmod,
{
struct st_api *(*create_api)(void);
- _eglLog(_EGL_DEBUG, "searching for st module %s", name);
+ if (name) {
+ _eglLog(_EGL_DEBUG, "searching for st module %s", name);
+ stmod->name = loader_strdup(name);
+ }
+ else {
+ stmod->name = NULL;
+ }
- stmod->name = loader_strdup(name);
if (stmod->name)
_eglSearchPathForEach(dlopen_st_module_cb, (void *) stmod);
else
@@ -200,19 +205,7 @@ get_st_api_full(enum st_api_type api, enum st_profile_type profile)
switch (api) {
case ST_API_OPENGL:
symbol = ST_CREATE_OPENGL_SYMBOL;
- switch (profile) {
- case ST_PROFILE_OPENGL_ES1:
- names[count++] = "GLESv1_CM";
- names[count++] = "GL";
- break;
- case ST_PROFILE_OPENGL_ES2:
- names[count++] = "GLESv2";
- names[count++] = "GL";
- break;
- default:
- names[count++] = "GL";
- break;
- }
+ names[count++] = "GL";
break;
case ST_API_OPENVG:
symbol = ST_CREATE_OPENVG_SYMBOL;
@@ -232,6 +225,21 @@ get_st_api_full(enum st_api_type api, enum st_profile_type profile)
break;
}
+ /* try again with libGL.so loaded */
+ if (!stmod->stapi && api == ST_API_OPENGL) {
+ struct util_dl_library *glapi = util_dl_open("libGL" UTIL_DL_EXT);
+
+ if (glapi) {
+ _eglLog(_EGL_DEBUG, "retry with libGL" UTIL_DL_EXT " loaded");
+ /* skip the last name (which is NULL) */
+ for (i = 0; i < count - 1; i++) {
+ if (load_st_module(stmod, names[i], symbol))
+ break;
+ }
+ util_dl_close(glapi);
+ }
+ }
+
if (!stmod->stapi) {
EGLint level = (egl_g3d_loader.profile_masks[api]) ?
_EGL_WARNING : _EGL_DEBUG;
diff --git a/src/gallium/targets/egl/st_GLESv1_CM.c b/src/gallium/targets/egl/st_GLESv1_CM.c
deleted file mode 100644
index c1df844aa4..0000000000
--- a/src/gallium/targets/egl/st_GLESv1_CM.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "state_tracker/st_gl_api.h"
-#include "egl.h"
-
-PUBLIC struct st_api *
-st_api_create_OpenGL(void)
-{
- return st_gl_api_create();
-}
diff --git a/src/gallium/targets/egl/st_GLESv2.c b/src/gallium/targets/egl/st_GLESv2.c
deleted file mode 100644
index c1df844aa4..0000000000
--- a/src/gallium/targets/egl/st_GLESv2.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "state_tracker/st_gl_api.h"
-#include "egl.h"
-
-PUBLIC struct st_api *
-st_api_create_OpenGL(void)
-{
- return st_gl_api_create();
-}
diff --git a/src/gallium/targets/graw-gdi/graw_gdi.c b/src/gallium/targets/graw-gdi/graw_gdi.c
index 17ca2a761c..99d8641d6b 100644
--- a/src/gallium/targets/graw-gdi/graw_gdi.c
+++ b/src/gallium/targets/graw-gdi/graw_gdi.c
@@ -66,7 +66,7 @@ graw_create_window_and_screen(int x,
{
struct sw_winsys *winsys = NULL;
struct pipe_screen *screen = NULL;
- WNDCLASSEX wc = {sizeof(wc)};
+ WNDCLASSEX wc;
UINT style = WS_VISIBLE | WS_TILEDWINDOW;
RECT rect;
HWND hWnd = NULL;
@@ -83,6 +83,8 @@ graw_create_window_and_screen(int x,
if (screen == NULL)
goto fail;
+ memset(&wc, 0, sizeof wc);
+ wc.cbSize = sizeof wc;
wc.style = CS_OWNDC | CS_HREDRAW | CS_VREDRAW;
wc.lpfnWndProc = window_proc;
wc.lpszClassName = "graw-gdi";
diff --git a/src/gallium/targets/graw-null/graw_util.c b/src/gallium/targets/graw-null/graw_util.c
index e5cf526d33..09cba895d2 100644
--- a/src/gallium/targets/graw-null/graw_util.c
+++ b/src/gallium/targets/graw-null/graw_util.c
@@ -1,6 +1,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
#include "pipe/p_state.h"
#include "tgsi/tgsi_text.h"
#include "util/u_debug.h"
diff --git a/src/gallium/targets/graw-xlib/graw_xlib.c b/src/gallium/targets/graw-xlib/graw_xlib.c
index 578086f8f9..b6d798e577 100644
--- a/src/gallium/targets/graw-xlib/graw_xlib.c
+++ b/src/gallium/targets/graw-xlib/graw_xlib.c
@@ -66,9 +66,6 @@ graw_create_window_and_screen( int x,
root = RootWindow( graw.display, scrnum );
- if (format != PIPE_FORMAT_R8G8B8A8_UNORM)
- goto fail;
-
if (graw.display == NULL)
goto fail;
@@ -88,6 +85,23 @@ graw_create_window_and_screen( int x,
exit(1);
}
+ /* See if the requirested pixel format matches the visual */
+ if (visinfo->red_mask == 0xff0000 &&
+ visinfo->green_mask == 0xff00 &&
+ visinfo->blue_mask == 0xff) {
+ if (format != PIPE_FORMAT_B8G8R8A8_UNORM)
+ goto fail;
+ }
+ else if (visinfo->red_mask == 0xff &&
+ visinfo->green_mask == 0xff00 &&
+ visinfo->blue_mask == 0xff0000) {
+ if (format != PIPE_FORMAT_R8G8B8A8_UNORM)
+ goto fail;
+ }
+ else {
+ goto fail;
+ }
+
/* window attributes */
attr.background_pixel = 0;
attr.border_pixel = 0;
diff --git a/src/gallium/targets/libgl-gdi/SConscript b/src/gallium/targets/libgl-gdi/SConscript
index 6fa0851a1a..49462a8e30 100644
--- a/src/gallium/targets/libgl-gdi/SConscript
+++ b/src/gallium/targets/libgl-gdi/SConscript
@@ -15,7 +15,6 @@ env.Append(LIBS = [
'user32',
'kernel32',
'ws2_32',
- talloc,
])
sources = ['libgl_gdi.c']
@@ -37,6 +36,12 @@ drivers += [trace, rbug]
env['no_import_lib'] = 1
+# when GLES is enabled, gl* and _glapi_* belong to bridge_glapi and
+# shared_glapi respectively
+if env['gles']:
+ env.Prepend(LIBPATH = [shared_glapi.dir])
+ glapi = [bridge_glapi, 'libglapi']
+
opengl32 = env.SharedLibrary(
target ='opengl32',
source = sources,
diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile
index fb537c3155..53a6c33ed8 100644
--- a/src/gallium/targets/libgl-xlib/Makefile
+++ b/src/gallium/targets/libgl-xlib/Makefile
@@ -60,6 +60,10 @@ GL_LIB_DEPS += $(LLVM_LIBS)
LDFLAGS += $(LLVM_LDFLAGS)
endif
+ifeq ($(SHARED_GLAPI),1)
+GL_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(GL_LIB_DEPS)
+endif
+
.SUFFIXES : .cpp
diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript
index d932736be7..ca15372f1d 100644
--- a/src/gallium/targets/libgl-xlib/SConscript
+++ b/src/gallium/targets/libgl-xlib/SConscript
@@ -16,6 +16,12 @@ env.Append(CPPDEFINES = ['USE_XSHM'])
env.Prepend(LIBS = env['X11_LIBS'])
+# when GLES is enabled, gl* and _glapi_* belong to bridge_glapi and
+# shared_glapi respectively
+if env['gles']:
+ env.Prepend(LIBPATH = [shared_glapi.dir])
+ glapi = [bridge_glapi, 'glapi']
+
env.Prepend(LIBS = [
st_xlib,
ws_xlib,
@@ -23,7 +29,6 @@ env.Prepend(LIBS = [
mesa,
glsl,
gallium,
- 'talloc'
])
sources = [
diff --git a/src/gallium/targets/libgl-xlib/xlib.c b/src/gallium/targets/libgl-xlib/xlib.c
index 9a3e0e07b0..1a5892b94a 100644
--- a/src/gallium/targets/libgl-xlib/xlib.c
+++ b/src/gallium/targets/libgl-xlib/xlib.c
@@ -35,7 +35,6 @@
#include "state_tracker/xlib_sw_winsys.h"
#include "xm_public.h"
-#include "state_tracker/st_api.h"
#include "state_tracker/st_gl_api.h"
#include "target-helpers/inline_sw_helper.h"
#include "target-helpers/inline_debug_helper.h"
diff --git a/src/gallium/targets/xorg-radeon/Makefile b/src/gallium/targets/xorg-radeon/Makefile
index d3bc356992..6d5f2c3d16 100644
--- a/src/gallium/targets/xorg-radeon/Makefile
+++ b/src/gallium/targets/xorg-radeon/Makefile
@@ -19,6 +19,6 @@ DRIVER_PIPES = \
$(TOP)/src/gallium/drivers/rbug/librbug.a
DRIVER_LINKS = \
- $(shell pkg-config --libs libdrm libdrm_radeon)
+ $(shell pkg-config --libs libdrm)
include ../Makefile.xorg
diff --git a/src/gallium/tests/graw/SConscript b/src/gallium/tests/graw/SConscript
index 3341b88498..ad44d54c85 100644
--- a/src/gallium/tests/graw/SConscript
+++ b/src/gallium/tests/graw/SConscript
@@ -8,7 +8,7 @@ env.Prepend(LIBPATH = [graw.dir])
env.Prepend(LIBS = ['graw'])
-if env['platform'] in ('freebsd8', 'sunos5'):
+if env['platform'] in ('freebsd8', 'sunos'):
env.Append(LIBS = ['m'])
if env['platform'] == 'freebsd8':
@@ -24,6 +24,7 @@ progs = [
'gs-test',
'shader-leak',
'tri-gs',
+ 'quad-sample',
]
for name in progs:
diff --git a/src/gallium/tests/graw/clear.c b/src/gallium/tests/graw/clear.c
index 1ff80cadee..392d1503f1 100644
--- a/src/gallium/tests/graw/clear.c
+++ b/src/gallium/tests/graw/clear.c
@@ -2,6 +2,7 @@
* any utility code, just the graw interface and gallium.
*/
+#include <stdio.h>
#include "state_tracker/graw.h"
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
@@ -28,7 +29,7 @@ static void draw( void )
float clear_color[4] = {1,0,1,1};
ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0);
- ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ ctx->flush(ctx, NULL);
graw_save_surface_to_file(ctx, surf, NULL);
@@ -48,16 +49,17 @@ static void init( void )
* Also, no easy way of querying supported formats if the screen
* cannot be created first.
*/
- for (i = 0;
- window == NULL && formats[i] != PIPE_FORMAT_NONE;
- i++) {
-
- screen = graw_create_window_and_screen(0,0,300,300,
+ for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) {
+ screen = graw_create_window_and_screen(0, 0, 300, 300,
formats[i],
&window);
+ if (window && screen)
+ break;
+ }
+ if (!screen || !window) {
+ fprintf(stderr, "Unable to create window\n");
+ exit(1);
}
- if (window == NULL)
- exit(2);
ctx = screen->context_create(screen, NULL);
if (ctx == NULL)
diff --git a/src/gallium/tests/graw/fs-test.c b/src/gallium/tests/graw/fs-test.c
index 37be2d0830..fda23bd7c9 100644
--- a/src/gallium/tests/graw/fs-test.c
+++ b/src/gallium/tests/graw/fs-test.c
@@ -215,7 +215,6 @@ static void set_vertices( void )
vbuf.stride = sizeof( struct vertex );
- vbuf.max_index = sizeof(vertices) / vbuf.stride;
vbuf.buffer_offset = 0;
vbuf.buffer = screen->user_buffer_create(screen,
vertices,
@@ -277,7 +276,7 @@ static void draw( void )
ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0);
util_draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3);
- ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ ctx->flush(ctx, NULL);
graw_save_surface_to_file(ctx, surf, NULL);
@@ -433,15 +432,18 @@ static void init( void )
* Also, no easy way of querying supported formats if the screen
* cannot be created first.
*/
- for (i = 0;
- window == NULL && formats[i] != PIPE_FORMAT_NONE;
- i++) {
-
- screen = graw_create_window_and_screen(0,0,WIDTH,HEIGHT,
+ for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) {
+ screen = graw_create_window_and_screen(0, 0, 300, 300,
formats[i],
&window);
+ if (window && screen)
+ break;
}
-
+ if (!screen || !window) {
+ fprintf(stderr, "Unable to create window\n");
+ exit(1);
+ }
+
ctx = screen->context_create(screen, NULL);
if (ctx == NULL)
exit(3);
diff --git a/src/gallium/tests/graw/gs-test.c b/src/gallium/tests/graw/gs-test.c
index 812666a8c8..ebb26d2f3f 100644
--- a/src/gallium/tests/graw/gs-test.c
+++ b/src/gallium/tests/graw/gs-test.c
@@ -251,13 +251,11 @@ static void set_vertices( void )
vbuf.stride = sizeof( struct vertex );
vbuf.buffer_offset = 0;
if (draw_strip) {
- vbuf.max_index = sizeof(vertices_strip) / vbuf.stride;
vbuf.buffer = screen->user_buffer_create(screen,
vertices_strip,
sizeof(vertices_strip),
PIPE_BIND_VERTEX_BUFFER);
} else {
- vbuf.max_index = sizeof(vertices) / vbuf.stride;
vbuf.buffer = screen->user_buffer_create(screen,
vertices,
sizeof(vertices),
@@ -341,7 +339,7 @@ static void draw( void )
else
util_draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3);
- ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ ctx->flush(ctx, NULL);
graw_save_surface_to_file(ctx, surf, NULL);
@@ -497,15 +495,18 @@ static void init( void )
* Also, no easy way of querying supported formats if the screen
* cannot be created first.
*/
- for (i = 0;
- window == NULL && formats[i] != PIPE_FORMAT_NONE;
- i++) {
-
- screen = graw_create_window_and_screen(0,0,WIDTH,HEIGHT,
+ for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) {
+ screen = graw_create_window_and_screen(0, 0, 300, 300,
formats[i],
&window);
+ if (window && screen)
+ break;
}
-
+ if (!screen || !window) {
+ fprintf(stderr, "Unable to create window\n");
+ exit(1);
+ }
+
ctx = screen->context_create(screen, NULL);
if (ctx == NULL)
exit(3);
diff --git a/src/gallium/tests/graw/quad-sample.c b/src/gallium/tests/graw/quad-sample.c
new file mode 100644
index 0000000000..6903046cf0
--- /dev/null
+++ b/src/gallium/tests/graw/quad-sample.c
@@ -0,0 +1,414 @@
+/* Display a cleared blue window. This demo has no dependencies on
+ * any utility code, just the graw interface and gallium.
+ */
+
+#include "state_tracker/graw.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+
+#include "util/u_debug.h" /* debug_dump_surface_bmp() */
+#include "util/u_inlines.h"
+#include "util/u_memory.h" /* Offset() */
+#include "util/u_draw_quad.h"
+#include "util/u_box.h"
+
+#include <stdio.h>
+
+enum pipe_format formats[] = {
+ PIPE_FORMAT_R8G8B8A8_UNORM,
+ PIPE_FORMAT_B8G8R8A8_UNORM,
+ PIPE_FORMAT_NONE
+};
+
+static const int WIDTH = 300;
+static const int HEIGHT = 300;
+
+static struct pipe_screen *screen = NULL;
+static struct pipe_context *ctx = NULL;
+static struct pipe_resource *rttex = NULL;
+static struct pipe_resource *samptex = NULL;
+static struct pipe_surface *surf = NULL;
+static struct pipe_sampler_view *sv = NULL;
+static void *sampler = NULL;
+static void *window = NULL;
+
+struct vertex {
+ float position[4];
+ float color[4];
+};
+
+static struct vertex vertices[] =
+{
+ { { 0.9, -0.9, 0.0, 1.0 },
+ { 1, 0, 0, 1 } },
+
+ { { 0.9, 0.9, 0.0, 1.0 },
+ { 1, 1, 0, 1 } },
+
+ { {-0.9, 0.9, 0.0, 1.0 },
+ { 0, 1, 0, 1 } },
+
+ { {-0.9, -0.9, 0.0, 1.0 },
+ { 0, 0, 0, 1 } },
+};
+
+
+
+
+static void set_viewport( float x, float y,
+ float width, float height,
+ float near, float far)
+{
+ float z = far;
+ float half_width = (float)width / 2.0f;
+ float half_height = (float)height / 2.0f;
+ float half_depth = ((float)far - (float)near) / 2.0f;
+ struct pipe_viewport_state vp;
+
+ vp.scale[0] = half_width;
+ vp.scale[1] = half_height;
+ vp.scale[2] = half_depth;
+ vp.scale[3] = 1.0f;
+
+ vp.translate[0] = half_width + x;
+ vp.translate[1] = half_height + y;
+ vp.translate[2] = half_depth + z;
+ vp.translate[3] = 0.0f;
+
+ ctx->set_viewport_state( ctx, &vp );
+}
+
+static void set_vertices( void )
+{
+ struct pipe_vertex_element ve[2];
+ struct pipe_vertex_buffer vbuf;
+ void *handle;
+
+ memset(ve, 0, sizeof ve);
+
+ ve[0].src_offset = Offset(struct vertex, position);
+ ve[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ ve[1].src_offset = Offset(struct vertex, color);
+ ve[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+
+ handle = ctx->create_vertex_elements_state(ctx, 2, ve);
+ ctx->bind_vertex_elements_state(ctx, handle);
+
+
+ vbuf.stride = sizeof( struct vertex );
+ vbuf.buffer_offset = 0;
+ vbuf.buffer = screen->user_buffer_create(screen,
+ vertices,
+ sizeof(vertices),
+ PIPE_BIND_VERTEX_BUFFER);
+
+ ctx->set_vertex_buffers(ctx, 1, &vbuf);
+}
+
+static void set_vertex_shader( void )
+{
+ void *handle;
+ const char *text =
+ "VERT\n"
+ "DCL IN[0]\n"
+ "DCL IN[1]\n"
+ "DCL OUT[0], POSITION\n"
+ "DCL OUT[1], GENERIC[0]\n"
+ " 0: MOV OUT[1], IN[1]\n"
+ " 1: MOV OUT[0], IN[0]\n"
+ " 2: END\n";
+
+ handle = graw_parse_vertex_shader(ctx, text);
+ ctx->bind_vs_state(ctx, handle);
+}
+
+static void set_fragment_shader( void )
+{
+ void *handle;
+ const char *text =
+ "FRAG\n"
+ "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+ "DCL OUT[0], COLOR\n"
+ "DCL TEMP[0]\n"
+ "DCL SAMP[0]\n"
+ "DCL RES[0], 2D, FLOAT\n"
+ " 0: SAMPLE TEMP[0], IN[0], RES[0], SAMP[0]\n"
+ " 1: MOV OUT[0], TEMP[0]\n"
+ " 2: END\n";
+
+ handle = graw_parse_fragment_shader(ctx, text);
+ ctx->bind_fs_state(ctx, handle);
+}
+
+
+static void draw( void )
+{
+ float clear_color[4] = {.5,.5,.5,1};
+
+ ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0);
+ util_draw_arrays(ctx, PIPE_PRIM_QUADS, 0, 4);
+ ctx->flush(ctx, NULL);
+
+ graw_save_surface_to_file(ctx, surf, NULL);
+
+ screen->flush_frontbuffer(screen, rttex, 0, 0, window);
+}
+
+#define SIZE 16
+
+static void init_tex( void )
+{
+ struct pipe_sampler_view sv_template;
+ struct pipe_sampler_state sampler_desc;
+ struct pipe_resource templat;
+ struct pipe_box box;
+ ubyte tex2d[SIZE][SIZE][4];
+ int s, t;
+
+#if (SIZE != 2)
+ for (s = 0; s < SIZE; s++) {
+ for (t = 0; t < SIZE; t++) {
+ if (0) {
+ int x = (s ^ t) & 1;
+ tex2d[t][s][0] = (x) ? 0 : 63;
+ tex2d[t][s][1] = (x) ? 0 : 128;
+ tex2d[t][s][2] = 0;
+ tex2d[t][s][3] = 0xff;
+ }
+ else {
+ int x = ((s ^ t) >> 2) & 1;
+ tex2d[t][s][0] = s*255/(SIZE-1);
+ tex2d[t][s][1] = t*255/(SIZE-1);
+ tex2d[t][s][2] = (x) ? 0 : 128;
+ tex2d[t][s][3] = 0xff;
+ }
+ }
+ }
+#else
+ tex2d[0][0][0] = 0;
+ tex2d[0][0][1] = 255;
+ tex2d[0][0][2] = 255;
+ tex2d[0][0][3] = 0;
+
+ tex2d[0][1][0] = 0;
+ tex2d[0][1][1] = 0;
+ tex2d[0][1][2] = 255;
+ tex2d[0][1][3] = 255;
+
+ tex2d[1][0][0] = 255;
+ tex2d[1][0][1] = 255;
+ tex2d[1][0][2] = 0;
+ tex2d[1][0][3] = 255;
+
+ tex2d[1][1][0] = 255;
+ tex2d[1][1][1] = 0;
+ tex2d[1][1][2] = 0;
+ tex2d[1][1][3] = 255;
+#endif
+
+ templat.target = PIPE_TEXTURE_2D;
+ templat.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ templat.width0 = SIZE;
+ templat.height0 = SIZE;
+ templat.depth0 = 1;
+ templat.last_level = 0;
+ templat.nr_samples = 1;
+ templat.bind = PIPE_BIND_SAMPLER_VIEW;
+
+
+ samptex = screen->resource_create(screen,
+ &templat);
+ if (samptex == NULL)
+ exit(4);
+
+ u_box_2d(0,0,SIZE,SIZE, &box);
+
+ ctx->transfer_inline_write(ctx,
+ samptex,
+ 0,
+ PIPE_TRANSFER_WRITE,
+ &box,
+ tex2d,
+ sizeof tex2d[0],
+ sizeof tex2d);
+
+ /* Possibly read back & compare against original data:
+ */
+ if (0)
+ {
+ struct pipe_transfer *t;
+ uint32_t *ptr;
+ t = pipe_get_transfer(ctx, samptex,
+ 0, 0, /* level, layer */
+ PIPE_TRANSFER_READ,
+ 0, 0, SIZE, SIZE); /* x, y, width, height */
+
+ ptr = ctx->transfer_map(ctx, t);
+
+ if (memcmp(ptr, tex2d, sizeof tex2d) != 0) {
+ assert(0);
+ exit(9);
+ }
+
+ ctx->transfer_unmap(ctx, t);
+
+ ctx->transfer_destroy(ctx, t);
+ }
+
+ memset(&sv_template, 0, sizeof sv_template);
+ sv_template.format = samptex->format;
+ sv_template.texture = samptex;
+ sv_template.swizzle_r = 0;
+ sv_template.swizzle_g = 1;
+ sv_template.swizzle_b = 2;
+ sv_template.swizzle_a = 3;
+ sv = ctx->create_sampler_view(ctx, samptex, &sv_template);
+ if (sv == NULL)
+ exit(5);
+
+ ctx->set_fragment_sampler_views(ctx, 1, &sv);
+
+
+ memset(&sampler_desc, 0, sizeof sampler_desc);
+ sampler_desc.wrap_s = PIPE_TEX_WRAP_REPEAT;
+ sampler_desc.wrap_t = PIPE_TEX_WRAP_REPEAT;
+ sampler_desc.wrap_r = PIPE_TEX_WRAP_REPEAT;
+ sampler_desc.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+ sampler_desc.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler_desc.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ sampler_desc.compare_mode = PIPE_TEX_COMPARE_NONE;
+ sampler_desc.compare_func = 0;
+ sampler_desc.normalized_coords = 1;
+ sampler_desc.max_anisotropy = 0;
+
+ sampler = ctx->create_sampler_state(ctx, &sampler_desc);
+ if (sampler == NULL)
+ exit(6);
+
+ ctx->bind_fragment_sampler_states(ctx, 1, &sampler);
+
+}
+
+static void init( void )
+{
+ struct pipe_framebuffer_state fb;
+ struct pipe_resource templat;
+ struct pipe_surface surf_tmpl;
+ int i;
+
+ /* It's hard to say whether window or screen should be created
+ * first. Different environments would prefer one or the other.
+ *
+ * Also, no easy way of querying supported formats if the screen
+ * cannot be created first.
+ */
+ for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) {
+ screen = graw_create_window_and_screen(0, 0, 300, 300,
+ formats[i],
+ &window);
+ if (window && screen)
+ break;
+ }
+ if (!screen || !window) {
+ fprintf(stderr, "Unable to create window\n");
+ exit(1);
+ }
+
+ ctx = screen->context_create(screen, NULL);
+ if (ctx == NULL)
+ exit(3);
+
+ templat.target = PIPE_TEXTURE_2D;
+ templat.format = formats[i];
+ templat.width0 = WIDTH;
+ templat.height0 = HEIGHT;
+ templat.depth0 = 1;
+ templat.array_size = 1;
+ templat.last_level = 0;
+ templat.nr_samples = 1;
+ templat.bind = (PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_DISPLAY_TARGET);
+
+ rttex = screen->resource_create(screen,
+ &templat);
+ if (rttex == NULL)
+ exit(4);
+
+ surf_tmpl.format = templat.format;
+ surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
+ surf_tmpl.u.tex.level = 0;
+ surf_tmpl.u.tex.first_layer = 0;
+ surf_tmpl.u.tex.last_layer = 0;
+ surf = ctx->create_surface(ctx, rttex, &surf_tmpl);
+ if (surf == NULL)
+ exit(5);
+
+ memset(&fb, 0, sizeof fb);
+ fb.nr_cbufs = 1;
+ fb.width = WIDTH;
+ fb.height = HEIGHT;
+ fb.cbufs[0] = surf;
+
+ ctx->set_framebuffer_state(ctx, &fb);
+
+ {
+ struct pipe_blend_state blend;
+ void *handle;
+ memset(&blend, 0, sizeof blend);
+ blend.rt[0].colormask = PIPE_MASK_RGBA;
+ handle = ctx->create_blend_state(ctx, &blend);
+ ctx->bind_blend_state(ctx, handle);
+ }
+
+ {
+ struct pipe_depth_stencil_alpha_state depthstencil;
+ void *handle;
+ memset(&depthstencil, 0, sizeof depthstencil);
+ handle = ctx->create_depth_stencil_alpha_state(ctx, &depthstencil);
+ ctx->bind_depth_stencil_alpha_state(ctx, handle);
+ }
+
+ {
+ struct pipe_rasterizer_state rasterizer;
+ void *handle;
+ memset(&rasterizer, 0, sizeof rasterizer);
+ rasterizer.cull_face = PIPE_FACE_NONE;
+ rasterizer.gl_rasterization_rules = 1;
+ handle = ctx->create_rasterizer_state(ctx, &rasterizer);
+ ctx->bind_rasterizer_state(ctx, handle);
+ }
+
+ set_viewport(0, 0, WIDTH, HEIGHT, 30, 1000);
+
+ init_tex();
+
+ set_vertices();
+ set_vertex_shader();
+ set_fragment_shader();
+}
+
+static void args(int argc, char *argv[])
+{
+ int i;
+
+ for (i = 1; i < argc;) {
+ if (graw_parse_args(&i, argc, argv)) {
+ continue;
+ }
+ exit(1);
+ }
+}
+
+
+int main( int argc, char *argv[] )
+{
+ args(argc, argv);
+ init();
+
+ graw_set_display_func( draw );
+ graw_main_loop();
+ return 0;
+}
diff --git a/src/gallium/tests/graw/quad-tex.c b/src/gallium/tests/graw/quad-tex.c
index 952131d765..fd01cb3f84 100644
--- a/src/gallium/tests/graw/quad-tex.c
+++ b/src/gallium/tests/graw/quad-tex.c
@@ -2,6 +2,7 @@
* any utility code, just the graw interface and gallium.
*/
+#include <stdio.h>
#include "state_tracker/graw.h"
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
@@ -96,7 +97,6 @@ static void set_vertices( void )
vbuf.stride = sizeof( struct vertex );
- vbuf.max_index = sizeof(vertices) / vbuf.stride;
vbuf.buffer_offset = 0;
vbuf.buffer = screen->user_buffer_create(screen,
vertices,
@@ -147,7 +147,7 @@ static void draw( void )
ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0);
util_draw_arrays(ctx, PIPE_PRIM_QUADS, 0, 4);
- ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ ctx->flush(ctx, NULL);
graw_save_surface_to_file(ctx, surf, NULL);
@@ -303,15 +303,18 @@ static void init( void )
* Also, no easy way of querying supported formats if the screen
* cannot be created first.
*/
- for (i = 0;
- window == NULL && formats[i] != PIPE_FORMAT_NONE;
- i++) {
-
- screen = graw_create_window_and_screen(0,0,300,300,
+ for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) {
+ screen = graw_create_window_and_screen(0, 0, 300, 300,
formats[i],
&window);
+ if (window && screen)
+ break;
}
-
+ if (!screen || !window) {
+ fprintf(stderr, "Unable to create window\n");
+ exit(1);
+ }
+
ctx = screen->context_create(screen, NULL);
if (ctx == NULL)
exit(3);
diff --git a/src/gallium/tests/graw/shader-leak.c b/src/gallium/tests/graw/shader-leak.c
index b53f0a046c..004b1691b6 100644
--- a/src/gallium/tests/graw/shader-leak.c
+++ b/src/gallium/tests/graw/shader-leak.c
@@ -88,7 +88,6 @@ static void set_vertices( void )
vbuf.stride = sizeof(struct vertex);
- vbuf.max_index = sizeof(vertices) / vbuf.stride;
vbuf.buffer_offset = 0;
vbuf.buffer = screen->user_buffer_create(screen,
vertices,
@@ -150,7 +149,7 @@ static void draw( void )
ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0);
util_draw_arrays(ctx, PIPE_PRIM_POINTS, 0, 1);
- ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ ctx->flush(ctx, NULL);
ctx->bind_fs_state(ctx, NULL);
ctx->delete_fs_state(ctx, fs);
@@ -176,15 +175,18 @@ static void init( void )
* Also, no easy way of querying supported formats if the screen
* cannot be created first.
*/
- for (i = 0;
- window == NULL && formats[i] != PIPE_FORMAT_NONE;
- i++) {
-
- screen = graw_create_window_and_screen(0,0,300,300,
+ for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) {
+ screen = graw_create_window_and_screen(0, 0, 300, 300,
formats[i],
&window);
+ if (window && screen)
+ break;
}
-
+ if (!screen || !window) {
+ fprintf(stderr, "Unable to create window\n");
+ exit(1);
+ }
+
ctx = screen->context_create(screen, NULL);
if (ctx == NULL)
exit(3);
diff --git a/src/gallium/tests/graw/tri-gs.c b/src/gallium/tests/graw/tri-gs.c
index 84ff3e6773..ab0116bed7 100644
--- a/src/gallium/tests/graw/tri-gs.c
+++ b/src/gallium/tests/graw/tri-gs.c
@@ -2,6 +2,7 @@
* any utility code, just the graw interface and gallium.
*/
+#include <stdio.h>
#include "state_tracker/graw.h"
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
@@ -88,7 +89,6 @@ static void set_vertices( void )
vbuf.stride = sizeof( struct vertex );
- vbuf.max_index = sizeof(vertices) / vbuf.stride;
vbuf.buffer_offset = 0;
vbuf.buffer = screen->user_buffer_create(screen,
vertices,
@@ -163,7 +163,7 @@ static void draw( void )
ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0);
util_draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3);
- ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ ctx->flush(ctx, NULL);
screen->flush_frontbuffer(screen, tex, 0, 0, window);
}
@@ -182,13 +182,16 @@ static void init( void )
* Also, no easy way of querying supported formats if the screen
* cannot be created first.
*/
- for (i = 0;
- window == NULL && formats[i] != PIPE_FORMAT_NONE;
- i++) {
-
- screen = graw_create_window_and_screen(0,0,300,300,
+ for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) {
+ screen = graw_create_window_and_screen(0, 0, 300, 300,
formats[i],
&window);
+ if (window && screen)
+ break;
+ }
+ if (!screen || !window) {
+ fprintf(stderr, "Unable to create window\n");
+ exit(1);
}
ctx = screen->context_create(screen, NULL);
diff --git a/src/gallium/tests/graw/tri-instanced.c b/src/gallium/tests/graw/tri-instanced.c
index f33c061b22..bed3437466 100644
--- a/src/gallium/tests/graw/tri-instanced.c
+++ b/src/gallium/tests/graw/tri-instanced.c
@@ -132,7 +132,6 @@ static void set_vertices( void )
/* vertex data */
vbuf[0].stride = sizeof( struct vertex );
- vbuf[0].max_index = sizeof(vertices) / vbuf[0].stride;
vbuf[0].buffer_offset = 0;
vbuf[0].buffer = screen->user_buffer_create(screen,
vertices,
@@ -141,7 +140,6 @@ static void set_vertices( void )
/* instance data */
vbuf[1].stride = sizeof( inst_data[0] );
- vbuf[1].max_index = sizeof(inst_data) / vbuf[1].stride;
vbuf[1].buffer_offset = 0;
vbuf[1].buffer = screen->user_buffer_create(screen,
inst_data,
@@ -213,7 +211,7 @@ static void draw( void )
ctx->draw_vbo(ctx, &info);
- ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ ctx->flush(ctx, NULL);
graw_save_surface_to_file(ctx, surf, NULL);
@@ -234,13 +232,16 @@ static void init( void )
* Also, no easy way of querying supported formats if the screen
* cannot be created first.
*/
- for (i = 0;
- window == NULL && formats[i] != PIPE_FORMAT_NONE;
- i++) {
-
- screen = graw_create_window_and_screen(0,0,300,300,
+ for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) {
+ screen = graw_create_window_and_screen(0, 0, 300, 300,
formats[i],
&window);
+ if (window && screen)
+ break;
+ }
+ if (!screen || !window) {
+ fprintf(stderr, "Unable to create window\n");
+ exit(1);
}
ctx = screen->context_create(screen, NULL);
diff --git a/src/gallium/tests/graw/tri.c b/src/gallium/tests/graw/tri.c
index 2742c7c99e..30ead999a4 100644
--- a/src/gallium/tests/graw/tri.c
+++ b/src/gallium/tests/graw/tri.c
@@ -93,7 +93,6 @@ static void set_vertices( void )
vbuf.stride = sizeof( struct vertex );
- vbuf.max_index = sizeof(vertices) / vbuf.stride;
vbuf.buffer_offset = 0;
vbuf.buffer = screen->user_buffer_create(screen,
vertices,
@@ -141,7 +140,7 @@ static void draw( void )
ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0);
util_draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3);
- ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ ctx->flush(ctx, NULL);
graw_save_surface_to_file(ctx, surf, NULL);
@@ -162,13 +161,16 @@ static void init( void )
* Also, no easy way of querying supported formats if the screen
* cannot be created first.
*/
- for (i = 0;
- window == NULL && formats[i] != PIPE_FORMAT_NONE;
- i++) {
-
- screen = graw_create_window_and_screen(0,0,300,300,
+ for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) {
+ screen = graw_create_window_and_screen(0, 0, 300, 300,
formats[i],
&window);
+ if (window && screen)
+ break;
+ }
+ if (!screen || !window) {
+ fprintf(stderr, "Unable to create window\n");
+ exit(1);
}
ctx = screen->context_create(screen, NULL);
diff --git a/src/gallium/tests/graw/vs-test.c b/src/gallium/tests/graw/vs-test.c
index 58908f38a2..18e056dcb3 100644
--- a/src/gallium/tests/graw/vs-test.c
+++ b/src/gallium/tests/graw/vs-test.c
@@ -171,7 +171,6 @@ static void set_vertices( void )
}
vbuf.stride = sizeof( struct vertex );
- vbuf.max_index = sizeof(vertices) / vbuf.stride;
vbuf.buffer_offset = 0;
vbuf.buffer = screen->user_buffer_create(screen,
vertices,
@@ -228,7 +227,7 @@ static void draw( void )
ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0);
util_draw_arrays(ctx, PIPE_PRIM_POINTS, 0, Elements(vertices));
- ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ ctx->flush(ctx, NULL);
graw_save_surface_to_file(ctx, surf, NULL);
@@ -384,13 +383,16 @@ static void init( void )
* Also, no easy way of querying supported formats if the screen
* cannot be created first.
*/
- for (i = 0;
- window == NULL && formats[i] != PIPE_FORMAT_NONE;
- i++) {
-
- screen = graw_create_window_and_screen(0,0,WIDTH,HEIGHT,
+ for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) {
+ screen = graw_create_window_and_screen(0, 0, 300, 300,
formats[i],
&window);
+ if (window && screen)
+ break;
+ }
+ if (!screen || !window) {
+ fprintf(stderr, "Unable to create window\n");
+ exit(1);
}
ctx = screen->context_create(screen, NULL);
diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c
index 92c5b4dbb1..3a64b1c8d9 100644
--- a/src/gallium/tests/trivial/quad-tex.c
+++ b/src/gallium/tests/trivial/quad-tex.c
@@ -129,7 +129,8 @@ static void init_prog(struct program *p)
}
};
- p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(vertices));
+ p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STATIC, sizeof(vertices));
pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(vertices), vertices);
}
@@ -334,7 +335,7 @@ static void draw(struct program *p)
4, /* verts */
2); /* attribs/vert */
- p->pipe->flush(p->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+ p->pipe->flush(p->pipe, NULL);
debug_dump_surface_bmp(p->pipe, "result.bmp", p->framebuffer.cbufs[0]);
}
diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c
index 37c1573051..bfd2f3ca9a 100644
--- a/src/gallium/tests/trivial/tri.c
+++ b/src/gallium/tests/trivial/tri.c
@@ -120,7 +120,8 @@ static void init_prog(struct program *p)
}
};
- p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(vertices));
+ p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STATIC, sizeof(vertices));
pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(vertices), vertices);
}
@@ -263,7 +264,7 @@ static void draw(struct program *p)
3, /* verts */
2); /* attribs/vert */
- p->pipe->flush(p->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+ p->pipe->flush(p->pipe, NULL);
debug_dump_surface_bmp(p->pipe, "result.bmp", p->framebuffer.cbufs[0]);
}
diff --git a/src/gallium/tests/unit/SConscript b/src/gallium/tests/unit/SConscript
index 655e8a9b41..49f28c0369 100644
--- a/src/gallium/tests/unit/SConscript
+++ b/src/gallium/tests/unit/SConscript
@@ -4,7 +4,7 @@ env = env.Clone()
env.Prepend(LIBS = [gallium])
-if env['platform'] in ('freebsd8', 'sunos5'):
+if env['platform'] in ('freebsd8', 'sunos'):
env.Append(LIBS = ['m'])
if env['platform'] == 'freebsd8':
diff --git a/src/gallium/winsys/SConscript b/src/gallium/winsys/SConscript
index d74f8a2e98..9f36d225e1 100644
--- a/src/gallium/winsys/SConscript
+++ b/src/gallium/winsys/SConscript
@@ -13,6 +13,10 @@ SConscript([
'sw/gdi/SConscript',
])
+SConscript([
+ 'i915/sw/SConscript',
+])
+
if env['dri']:
SConscript([
'sw/dri/SConscript',
diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
index ebe86dcf19..03aa1b1537 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
@@ -4,6 +4,8 @@
#include "i915_drm.h"
#include "i915/i915_debug.h"
+#include <xf86drm.h>
+#include <stdio.h>
#define BATCH_RESERVED 16
@@ -34,7 +36,6 @@ static void
i915_drm_batchbuffer_reset(struct i915_drm_batchbuffer *batch)
{
struct i915_drm_winsys *idws = i915_drm_winsys(batch->base.iws);
- int ret;
if (batch->bo)
drm_intel_bo_unreference(batch->bo);
@@ -43,18 +44,6 @@ i915_drm_batchbuffer_reset(struct i915_drm_batchbuffer *batch)
batch->actual_size,
4096);
-#ifdef INTEL_MAP_BATCHBUFFER
-#ifdef INTEL_MAP_GTT
- ret = drm_intel_gem_bo_map_gtt(batch->bo);
-#else
- ret = drm_intel_bo_map(batch->bo, TRUE);
-#endif
- assert(ret == 0);
- batch->base.map = batch->bo->virtual;
-#else
- (void)ret;
-#endif
-
memset(batch->base.map, 0, batch->actual_size);
batch->base.ptr = batch->base.map;
batch->base.size = batch->actual_size - BATCH_RESERVED;
@@ -74,7 +63,6 @@ i915_drm_batchbuffer_create(struct i915_winsys *iws)
batch->base.size = 0;
batch->base.relocs = 0;
- batch->base.max_relocs = 300;/*INTEL_DEFAULT_RELOCS;*/
batch->base.iws = iws;
@@ -83,11 +71,31 @@ i915_drm_batchbuffer_create(struct i915_winsys *iws)
return &batch->base;
}
+static boolean
+i915_drm_batchbuffer_validate_buffers(struct i915_winsys_batchbuffer *batch,
+ struct i915_winsys_buffer **buffer,
+ int num_of_buffers)
+{
+ struct i915_drm_batchbuffer *drm_batch = i915_drm_batchbuffer(batch);
+ drm_intel_bo *bos[num_of_buffers + 1];
+ int i, ret;
+
+ bos[0] = drm_batch->bo;
+ for (i = 0; i < num_of_buffers; i++)
+ bos[i+1] = intel_bo(buffer[i]);
+
+ ret = drm_intel_bufmgr_check_aperture_space(bos, num_of_buffers);
+ if (ret != 0)
+ return FALSE;
+
+ return TRUE;
+}
+
static int
i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
struct i915_winsys_buffer *buffer,
enum i915_winsys_buffer_usage usage,
- unsigned pre_add, bool fenced)
+ unsigned pre_add, boolean fenced)
{
struct i915_drm_batchbuffer *batch = i915_drm_batchbuffer(ibatch);
unsigned write_domain = 0;
@@ -95,8 +103,6 @@ i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
unsigned offset;
int ret = 0;
- assert(batch->base.relocs < batch->base.max_relocs);
-
switch (usage) {
case I915_USAGE_SAMPLER:
write_domain = 0;
@@ -145,6 +151,12 @@ i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
return ret;
}
+static void
+i915_drm_throttle(struct i915_drm_winsys *idws)
+{
+ drmIoctl(idws->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL);
+}
+
static void
i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch,
struct pipe_fence_handle **fence)
@@ -168,11 +180,21 @@ i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch,
if (ret == 0 && i915_drm_winsys(ibatch->iws)->send_cmd)
ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0);
+ i915_drm_throttle(i915_drm_winsys(ibatch->iws));
+
if (ret != 0 || i915_drm_winsys(ibatch->iws)->dump_cmd) {
i915_dump_batchbuffer(ibatch);
assert(ret == 0);
}
+ if (i915_drm_winsys(ibatch->iws)->dump_raw_file) {
+ FILE *file = fopen(i915_drm_winsys(ibatch->iws)->dump_raw_file, "a");
+ if (file) {
+ fwrite(batch->base.map, used, 1, file);
+ fclose(file);
+ }
+ }
+
#ifdef INTEL_RUN_SYNC
drm_intel_bo_wait_rendering(batch->bo);
#endif
@@ -206,6 +228,7 @@ i915_drm_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch)
void i915_drm_winsys_init_batchbuffer_functions(struct i915_drm_winsys *idws)
{
idws->base.batchbuffer_create = i915_drm_batchbuffer_create;
+ idws->base.validate_buffers = i915_drm_batchbuffer_validate_buffers;
idws->base.batchbuffer_reloc = i915_drm_batchbuffer_reloc;
idws->base.batchbuffer_flush = i915_drm_batchbuffer_flush;
idws->base.batchbuffer_destroy = i915_drm_batchbuffer_destroy;
diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.c b/src/gallium/winsys/i915/drm/i915_drm_winsys.c
index 2288b48b2b..2c3b508d05 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_winsys.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.c
@@ -72,6 +72,7 @@ i915_drm_winsys_create(int drmFD)
drm_intel_bufmgr_gem_enable_fenced_relocs(idws->gem_manager);
idws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE);
+ idws->dump_raw_file = debug_get_option("I915_DUMP_RAW_FILE", NULL);
idws->send_cmd = !debug_get_bool_option("I915_NO_HW", FALSE);
return &idws->base;
diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.h b/src/gallium/winsys/i915/drm/i915_drm_winsys.h
index 0d74d0270c..dae53c3e80 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_winsys.h
+++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.h
@@ -18,6 +18,7 @@ struct i915_drm_winsys
struct i915_winsys base;
boolean dump_cmd;
+ char *dump_raw_file;
boolean send_cmd;
int fd; /**< Drm file discriptor */
diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
index 44773ae30e..3bf54011d9 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
@@ -1,6 +1,7 @@
#include "i915_sw_winsys.h"
#include "i915/i915_batchbuffer.h"
+#include "i915/i915_debug.h"
#include "util/u_memory.h"
#define BATCH_RESERVED 16
@@ -48,7 +49,6 @@ i915_sw_batchbuffer_create(struct i915_winsys *iws)
batch->base.size = 0;
batch->base.relocs = 0;
- batch->base.max_relocs = 300;/*INTEL_DEFAULT_RELOCS;*/
batch->base.iws = iws;
@@ -57,17 +57,23 @@ i915_sw_batchbuffer_create(struct i915_winsys *iws)
return &batch->base;
}
+static boolean
+i915_sw_batchbuffer_validate_buffers(struct i915_winsys_batchbuffer *batch,
+ struct i915_winsys_buffer **buffer,
+ int num_of_buffers)
+{
+ return TRUE;
+}
+
static int
i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
struct i915_winsys_buffer *buffer,
enum i915_winsys_buffer_usage usage,
- unsigned pre_add, bool fenced)
+ unsigned pre_add, boolean fenced)
{
struct i915_sw_batchbuffer *batch = i915_sw_batchbuffer(ibatch);
int ret = 0;
- assert(batch->base.relocs < batch->base.max_relocs);
-
if (usage == I915_USAGE_SAMPLER) {
} else if (usage == I915_USAGE_RENDER) {
@@ -98,7 +104,6 @@ i915_sw_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch,
{
struct i915_sw_batchbuffer *batch = i915_sw_batchbuffer(ibatch);
unsigned used = 0;
- int i;
assert(i915_winsys_batchbuffer_space(ibatch) >= 0);
@@ -107,27 +112,22 @@ i915_sw_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch,
#ifdef INTEL_ALWAYS_FLUSH
/* MI_FLUSH | FLUSH_MAP_CACHE */
- i915_winsys_batchbuffer_dword(ibatch, (0x4<<23)|(1<<0));
+ i915_winsys_batchbuffer_dword_unchecked(ibatch, (0x4<<23)|(1<<0));
used += 4;
#endif
if ((used & 4) == 0) {
/* MI_NOOP */
- i915_winsys_batchbuffer_dword(ibatch, 0);
+ i915_winsys_batchbuffer_dword_unchecked(ibatch, 0);
}
/* MI_BATCH_BUFFER_END */
- i915_winsys_batchbuffer_dword(ibatch, (0xA<<23));
+ i915_winsys_batchbuffer_dword_unchecked(ibatch, (0xA<<23));
used = batch->base.ptr - batch->base.map;
assert((used & 4) == 0);
if (i915_sw_winsys(ibatch->iws)->dump_cmd) {
- unsigned *ptr = (unsigned *)batch->base.map;
-
- debug_printf("%s:\n", __func__);
- for (i = 0; i < used / 4; i++, ptr++) {
- debug_printf("\t%08x: %08x\n", i*4, *ptr);
- }
+ i915_dump_batchbuffer(ibatch);
}
if (fence) {
@@ -151,6 +151,7 @@ i915_sw_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch)
void i915_sw_winsys_init_batchbuffer_functions(struct i915_sw_winsys *isws)
{
isws->base.batchbuffer_create = i915_sw_batchbuffer_create;
+ isws->base.validate_buffers = i915_sw_batchbuffer_validate_buffers;
isws->base.batchbuffer_reloc = i915_sw_batchbuffer_reloc;
isws->base.batchbuffer_flush = i915_sw_batchbuffer_flush;
isws->base.batchbuffer_destroy = i915_sw_batchbuffer_destroy;
diff --git a/src/gallium/winsys/i915/sw/i915_sw_buffer.c b/src/gallium/winsys/i915/sw/i915_sw_buffer.c
index 834805e621..9a7e90e217 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_buffer.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_buffer.c
@@ -84,7 +84,7 @@ i915_sw_buffer_write(struct i915_winsys *iws,
{
struct i915_sw_buffer *buf = i915_sw_buffer(buffer);
- memcpy(buf->ptr + offset, data, size);
+ memcpy((char*)buf->ptr + offset, data, size);
return 0;
}
diff --git a/src/gallium/winsys/i915/sw/i915_sw_winsys.c b/src/gallium/winsys/i915/sw/i915_sw_winsys.c
index 058ddc44aa..fc48da6fb9 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_winsys.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_winsys.c
@@ -50,7 +50,7 @@ i915_sw_winsys_create()
isws->base.pci_id = deviceID;
isws->max_batch_size = 16 * 4096;
- isws->dump_cmd = debug_get_bool_option("INTEL_DUMP_CMD", FALSE);
+ isws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE);
return &isws->base;
}
diff --git a/src/gallium/winsys/i965/drm/i965_drm_buffer.c b/src/gallium/winsys/i965/drm/i965_drm_buffer.c
index ed62db60bb..a904179eeb 100644
--- a/src/gallium/winsys/i965/drm/i965_drm_buffer.c
+++ b/src/gallium/winsys/i965/drm/i965_drm_buffer.c
@@ -325,7 +325,7 @@ i965_libdrm_bo_subdata(struct brw_winsys_buffer *buffer,
brw_dump_data( idws->base.pci_id,
data_type,
buf->bo->offset + offset,
- data, size );
+ data, size, buffer->sws->gen );
/* XXX: use bo_map_gtt/memcpy/unmap_gtt under some circumstances???
*/
@@ -464,7 +464,7 @@ i965_libdrm_bo_flush_range(struct brw_winsys_buffer *buffer,
buf->data_type,
buf->bo->offset + offset,
(char*)buf->bo->virtual + offset,
- length );
+ length, buffer->sws->gen );
}
static void
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index d4bf124ce6..648d6c8a8e 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -50,6 +50,9 @@ nouveau_drm_screen_create(int fd)
case 0xa0:
init = nv50_screen_create;
break;
+ case 0xc0:
+ init = nvc0_screen_create;
+ break;
default:
debug_printf("%s: unknown chipset nv%02x\n", __func__,
dev->chipset);
diff --git a/src/gallium/winsys/r600/Android.mk b/src/gallium/winsys/r600/Android.mk
new file mode 100644
index 0000000000..643a59dc42
--- /dev/null
+++ b/src/gallium/winsys/r600/Android.mk
@@ -0,0 +1,38 @@
+ifeq ($(strip $(MESA_BUILD_R600G)),true)
+
+LOCAL_PATH := $(call my-dir)
+
+# from drm/Makefile
+C_SOURCES = \
+ bof.c \
+ evergreen_hw_context.c \
+ radeon_bo.c \
+ radeon_pciid.c \
+ r600_bo.c \
+ r600_drm.c \
+ r600_hw_context.c \
+ r600_bomgr.c
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ $(addprefix drm/, $(C_SOURCES))
+
+LOCAL_CFLAGS := \
+ -std=c99 \
+ -fvisibility=hidden \
+ -Wno-sign-compare
+
+LOCAL_C_INCLUDES := \
+ external/mesa/include \
+ external/mesa/src/gallium/include \
+ external/mesa/src/gallium/auxiliary \
+ external/mesa/src/gallium/drivers/r600 \
+ external/drm \
+ external/drm/include/drm
+
+LOCAL_MODULE := libmesa_winsys_r600
+
+include $(BUILD_STATIC_LIBRARY)
+
+endif # MESA_BUILD_R600G
diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile
index a396205f89..7310734f05 100644
--- a/src/gallium/winsys/r600/drm/Makefile
+++ b/src/gallium/winsys/r600/drm/Makefile
@@ -8,12 +8,11 @@ C_SOURCES = \
bof.c \
evergreen_hw_context.c \
radeon_bo.c \
- radeon_bo_pb.c \
radeon_pciid.c \
- r600.c \
r600_bo.c \
r600_drm.c \
- r600_hw_context.c
+ r600_hw_context.c \
+ r600_bomgr.c
LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \
$(shell pkg-config libdrm --cflags-only-I)
diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript
index cc053c06dd..cc9a06a239 100644
--- a/src/gallium/winsys/r600/drm/SConscript
+++ b/src/gallium/winsys/r600/drm/SConscript
@@ -6,15 +6,19 @@ r600_sources = [
'bof.c',
'evergreen_hw_context.c',
'radeon_bo.c',
- 'radeon_bo_pb.c',
'radeon_pciid.c',
- 'r600.c',
'r600_bo.c',
'r600_drm.c',
'r600_hw_context.c',
+ 'r600_bomgr.c',
]
-env.ParseConfig('pkg-config --cflags libdrm_radeon')
+try:
+ env.ParseConfig('pkg-config --cflags libdrm_radeon')
+except OSError:
+ print 'warning: not building r600g'
+ Return()
+
env.Append(CPPPATH = '#/src/gallium/drivers/r600')
r600winsys = env.ConvenienceLibrary(
diff --git a/src/gallium/winsys/r600/drm/bof.c b/src/gallium/winsys/r600/drm/bof.c
index 0598cc6bc0..5c923ad38d 100644
--- a/src/gallium/winsys/r600/drm/bof.c
+++ b/src/gallium/winsys/r600/drm/bof.c
@@ -46,7 +46,7 @@ static int bof_entry_grow(bof_t *bof)
}
/*
- * object
+ * object
*/
bof_t *bof_object(void)
{
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index e1f163eab6..66398afa69 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -36,7 +36,6 @@
#include "pipe/p_compiler.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
-#include <pipebuffer/pb_bufmgr.h>
#include "r600_priv.h"
#define GROUP_FORCE_NEW_BLOCK 0
@@ -622,10 +621,9 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
/* save 16dwords space for fence mecanism */
ctx->pm4_ndwords -= 16;
- r = r600_context_init_fence(ctx);
- if (r) {
- goto out_err;
- }
+ ctx->max_db = 8;
+
+ LIST_INITHEAD(&ctx->fenced_bo);
/* init dirty list */
LIST_INITHEAD(&ctx->dirty);
@@ -836,25 +834,25 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
}
/* draw packet */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances;
if (draw->indices) {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices);
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = 0;
r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices);
} else {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
}
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
/* flush color buffer */
@@ -881,59 +879,3 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
ctx->pm4_dirty_cdwords = 0;
}
-static inline void evergreen_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
-{
- struct r600_range *range;
- struct r600_block *block;
-
- range = &ctx->range[CTX_RANGE_ID(ctx, offset)];
- block = range->blocks[CTX_BLOCK_ID(ctx, offset)];
- block->reg[0] = state->regs[0].value;
- block->reg[1] = state->regs[1].value;
- block->reg[2] = state->regs[2].value;
- block->reg[3] = state->regs[3].value;
- block->reg[4] = state->regs[4].value;
- block->reg[5] = state->regs[5].value;
- block->reg[6] = state->regs[6].value;
- block->reg[7] = state->regs[7].value;
- r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
- r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
- if (state->regs[0].bo) {
- /* VERTEX RESOURCE, we preted there is 2 bo to relocate so
- * we have single case btw VERTEX & TEXTURE resource
- */
- r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo);
- r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo);
- } else {
- /* TEXTURE RESOURCE */
- r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
- r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
- }
- if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
- block->status |= R600_BLOCK_STATUS_ENABLED;
- block->status |= R600_BLOCK_STATUS_DIRTY;
- ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
- LIST_ADDTAIL(&block->list,&ctx->dirty);
- }
-}
-
-void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
-{
- unsigned offset = R_030000_RESOURCE0_WORD0 + 0x20 * rid;
-
- evergreen_resource_set(ctx, state, offset);
-}
-
-void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
-{
- unsigned offset = R_030000_RESOURCE0_WORD0 + 0x1600 + 0x20 * rid;
-
- evergreen_resource_set(ctx, state, offset);
-}
-
-void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
-{
- unsigned offset = R_030000_RESOURCE0_WORD0 + 0x7C00 + 0x20 * rid;
-
- evergreen_resource_set(ctx, state, offset);
-}
diff --git a/src/gallium/winsys/r600/drm/r600.c b/src/gallium/winsys/r600/drm/r600.c
deleted file mode 100644
index f5e53e21f5..0000000000
--- a/src/gallium/winsys/r600/drm/r600.c
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- */
-#include "xf86drm.h"
-#include "radeon_drm.h"
-#include "pipe/p_compiler.h"
-#include "util/u_inlines.h"
-#include <pipebuffer/pb_bufmgr.h>
-#include "r600_priv.h"
-
-enum radeon_family r600_get_family(struct radeon *r600)
-{
- return r600->family;
-}
-
-enum chip_class r600_get_family_class(struct radeon *radeon)
-{
- return radeon->chip_class;
-}
-
-struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon)
-{
- return &radeon->tiling_info;
-}
-
-static int r600_get_device(struct radeon *r600)
-{
- struct drm_radeon_info info;
-
- r600->device = 0;
- info.request = RADEON_INFO_DEVICE_ID;
- info.value = (uintptr_t)&r600->device;
- return drmCommandWriteRead(r600->fd, DRM_RADEON_INFO, &info, sizeof(struct drm_radeon_info));
-}
-
-struct radeon *r600_new(int fd, unsigned device)
-{
- struct radeon *r600;
- int r;
-
- r600 = calloc(1, sizeof(*r600));
- if (r600 == NULL) {
- return NULL;
- }
- r600->fd = fd;
- r600->device = device;
- if (fd >= 0) {
- r = r600_get_device(r600);
- if (r) {
- R600_ERR("Failed to get device id\n");
- r600_delete(r600);
- return NULL;
- }
- }
- r600->family = radeon_family_from_device(r600->device);
- if (r600->family == CHIP_UNKNOWN) {
- R600_ERR("Unknown chipset 0x%04X\n", r600->device);
- r600_delete(r600);
- return NULL;
- }
- switch (r600->family) {
- case CHIP_R600:
- case CHIP_RV610:
- case CHIP_RV630:
- case CHIP_RV670:
- case CHIP_RV620:
- case CHIP_RV635:
- case CHIP_RS780:
- case CHIP_RS880:
- case CHIP_RV770:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_RV740:
- case CHIP_CEDAR:
- case CHIP_REDWOOD:
- case CHIP_JUNIPER:
- case CHIP_CYPRESS:
- case CHIP_HEMLOCK:
- case CHIP_PALM:
- break;
- case CHIP_R100:
- case CHIP_RV100:
- case CHIP_RS100:
- case CHIP_RV200:
- case CHIP_RS200:
- case CHIP_R200:
- case CHIP_RV250:
- case CHIP_RS300:
- case CHIP_RV280:
- case CHIP_R300:
- case CHIP_R350:
- case CHIP_RV350:
- case CHIP_RV380:
- case CHIP_R420:
- case CHIP_R423:
- case CHIP_RV410:
- case CHIP_RS400:
- case CHIP_RS480:
- case CHIP_RS600:
- case CHIP_RS690:
- case CHIP_RS740:
- case CHIP_RV515:
- case CHIP_R520:
- case CHIP_RV530:
- case CHIP_RV560:
- case CHIP_RV570:
- case CHIP_R580:
- default:
- R600_ERR("unknown or unsupported chipset 0x%04X\n", r600->device);
- break;
- }
-
- /* setup class */
- switch (r600->family) {
- case CHIP_R600:
- case CHIP_RV610:
- case CHIP_RV630:
- case CHIP_RV670:
- case CHIP_RV620:
- case CHIP_RV635:
- case CHIP_RS780:
- case CHIP_RS880:
- r600->chip_class = R600;
- break;
- case CHIP_RV770:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_RV740:
- r600->chip_class = R700;
- break;
- case CHIP_CEDAR:
- case CHIP_REDWOOD:
- case CHIP_JUNIPER:
- case CHIP_CYPRESS:
- case CHIP_HEMLOCK:
- case CHIP_PALM:
- r600->chip_class = EVERGREEN;
- break;
- default:
- R600_ERR("unknown or unsupported chipset 0x%04X\n", r600->device);
- break;
- }
-
- return r600;
-}
-
-void r600_delete(struct radeon *r600)
-{
- if (r600 == NULL)
- return;
- drmClose(r600->fd);
- free(r600);
-}
diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 251f009a6b..122a68884b 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -36,142 +36,152 @@ struct r600_bo *r600_bo(struct radeon *radeon,
unsigned size, unsigned alignment,
unsigned binding, unsigned usage)
{
- struct r600_bo *ws_bo = calloc(1, sizeof(struct r600_bo));
- struct pb_desc desc;
- struct pb_manager *man;
+ struct r600_bo *bo;
+ struct radeon_bo *rbo;
- desc.alignment = alignment;
- desc.usage = (PB_USAGE_CPU_READ_WRITE | PB_USAGE_GPU_READ_WRITE);
- ws_bo->size = size;
+ if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
+ bo = r600_bomgr_bo_create(radeon->bomgr, size, alignment, *radeon->cfence);
+ if (bo) {
+ return bo;
+ }
+ }
- if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
- man = radeon->cman;
- else
- man = radeon->kman;
+ rbo = radeon_bo(radeon, 0, size, alignment);
+ if (rbo == NULL) {
+ return NULL;
+ }
+
+ bo = calloc(1, sizeof(struct r600_bo));
+ bo->size = size;
+ bo->alignment = alignment;
+ bo->bo = rbo;
+ if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
+ r600_bomgr_bo_init(radeon->bomgr, bo);
+ }
/* Staging resources particpate in transfers and blits only
* and are used for uploads and downloads from regular
* resources. We generate them internally for some transfers.
*/
if (usage == PIPE_USAGE_STAGING)
- ws_bo->domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT;
- else
- ws_bo->domains = (RADEON_GEM_DOMAIN_CPU |
- RADEON_GEM_DOMAIN_GTT |
- RADEON_GEM_DOMAIN_VRAM);
-
-
- ws_bo->pb = man->create_buffer(man, size, &desc);
- if (ws_bo->pb == NULL) {
- free(ws_bo);
- return NULL;
- }
+ bo->domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT;
+ else
+ bo->domains = (RADEON_GEM_DOMAIN_CPU |
+ RADEON_GEM_DOMAIN_GTT |
+ RADEON_GEM_DOMAIN_VRAM);
- pipe_reference_init(&ws_bo->reference, 1);
- return ws_bo;
+ pipe_reference_init(&bo->reference, 1);
+ return bo;
}
struct r600_bo *r600_bo_handle(struct radeon *radeon,
unsigned handle, unsigned *array_mode)
{
- struct r600_bo *ws_bo = calloc(1, sizeof(struct r600_bo));
- struct radeon_bo *bo;
+ struct r600_bo *bo = calloc(1, sizeof(struct r600_bo));
+ struct radeon_bo *rbo;
- ws_bo->pb = radeon_bo_pb_create_buffer_from_handle(radeon->kman, handle);
- if (!ws_bo->pb) {
- free(ws_bo);
+ rbo = bo->bo = radeon_bo(radeon, handle, 0, 0);
+ if (rbo == NULL) {
+ free(bo);
return NULL;
}
- bo = radeon_bo_pb_get_bo(ws_bo->pb);
- ws_bo->size = bo->size;
- ws_bo->domains = (RADEON_GEM_DOMAIN_CPU |
- RADEON_GEM_DOMAIN_GTT |
- RADEON_GEM_DOMAIN_VRAM);
+ bo->size = rbo->size;
+ bo->domains = (RADEON_GEM_DOMAIN_CPU |
+ RADEON_GEM_DOMAIN_GTT |
+ RADEON_GEM_DOMAIN_VRAM);
- pipe_reference_init(&ws_bo->reference, 1);
+ pipe_reference_init(&bo->reference, 1);
- radeon_bo_get_tiling_flags(radeon, bo, &ws_bo->tiling_flags,
- &ws_bo->kernel_pitch);
+ radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags, &bo->kernel_pitch);
if (array_mode) {
- if (ws_bo->tiling_flags) {
- if (ws_bo->tiling_flags & RADEON_TILING_MICRO)
- *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1;
- if ((ws_bo->tiling_flags & (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) ==
- (RADEON_TILING_MICRO | RADEON_TILING_MACRO))
+ if (bo->tiling_flags) {
+ if (bo->tiling_flags & RADEON_TILING_MACRO)
*array_mode = V_0280A0_ARRAY_2D_TILED_THIN1;
+ else if (bo->tiling_flags & RADEON_TILING_MICRO)
+ *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1;
} else {
*array_mode = 0;
}
}
- return ws_bo;
+ return bo;
}
void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx)
{
- return pb_map(bo->pb, usage, ctx);
+ struct pipe_context *pctx = ctx;
+
+ if (usage & PB_USAGE_UNSYNCHRONIZED) {
+ radeon_bo_map(radeon, bo->bo);
+ return (uint8_t *) bo->bo->data + bo->offset;
+ }
+
+ if (p_atomic_read(&bo->bo->reference.count) > 1) {
+ if (usage & PB_USAGE_DONTBLOCK) {
+ return NULL;
+ }
+ if (ctx) {
+ pctx->flush(pctx, NULL);
+ }
+ }
+
+ if (usage & PB_USAGE_DONTBLOCK) {
+ uint32_t domain;
+
+ if (radeon_bo_busy(radeon, bo->bo, &domain))
+ return NULL;
+ if (radeon_bo_map(radeon, bo->bo)) {
+ return NULL;
+ }
+ goto out;
+ }
+
+ radeon_bo_map(radeon, bo->bo);
+ if (radeon_bo_wait(radeon, bo->bo)) {
+ radeon_bo_unmap(radeon, bo->bo);
+ return NULL;
+ }
+
+out:
+ return (uint8_t *) bo->bo->data + bo->offset;
}
void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo)
{
- pb_unmap(bo->pb);
+ radeon_bo_unmap(radeon, bo->bo);
}
-static void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo)
+void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo)
{
- if (bo->pb)
- pb_reference(&bo->pb, NULL);
+ if (bo->manager_id) {
+ if (!r600_bomgr_bo_destroy(radeon->bomgr, bo)) {
+ /* destroy is delayed by buffer manager */
+ return;
+ }
+ }
+ radeon_bo_reference(radeon, &bo->bo, NULL);
free(bo);
}
-void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst,
- struct r600_bo *src)
+void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src)
{
struct r600_bo *old = *dst;
-
+
if (pipe_reference(&(*dst)->reference, &src->reference)) {
r600_bo_destroy(radeon, old);
}
*dst = src;
}
-unsigned r600_bo_get_handle(struct r600_bo *pb_bo)
-{
- struct radeon_bo *bo;
-
- bo = radeon_bo_pb_get_bo(pb_bo->pb);
- if (!bo)
- return 0;
-
- return bo->handle;
-}
-
-unsigned r600_bo_get_size(struct r600_bo *pb_bo)
-{
- struct radeon_bo *bo;
-
- bo = radeon_bo_pb_get_bo(pb_bo->pb);
- if (!bo)
- return 0;
-
- return bo->size;
-}
-
-boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo,
+boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *bo,
unsigned stride, struct winsys_handle *whandle)
{
- struct radeon_bo *bo;
-
- bo = radeon_bo_pb_get_bo(pb_bo->pb);
- if (!bo)
- return FALSE;
-
whandle->stride = stride;
switch(whandle->type) {
case DRM_API_HANDLE_TYPE_KMS:
- whandle->handle = r600_bo_get_handle(pb_bo);
+ whandle->handle = r600_bo_get_handle(bo);
break;
case DRM_API_HANDLE_TYPE_SHARED:
- if (radeon_bo_get_name(radeon, bo, &whandle->handle))
+ if (radeon_bo_get_name(radeon, bo->bo, &whandle->handle))
return FALSE;
break;
default:
diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c
new file mode 100644
index 0000000000..446ef0f9cf
--- /dev/null
+++ b/src/gallium/winsys/r600/drm/r600_bomgr.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2010 VMWare.
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jose Fonseca <jrfonseca-at-vmware-dot-com>
+ * Thomas Hellström <thomas-at-vmware-dot-com>
+ * Jerome Glisse <jglisse@redhat.com>
+ */
+#include <util/u_memory.h>
+#include <util/u_double_list.h>
+#include <util/u_time.h>
+#include <pipebuffer/pb_bufmgr.h>
+#include "r600_priv.h"
+
+static void r600_bomgr_timeout_flush(struct r600_bomgr *mgr)
+{
+ struct r600_bo *bo, *tmp;
+ int64_t now;
+
+ now = os_time_get();
+ LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) {
+ if(!os_time_timeout(bo->start, bo->end, now))
+ break;
+
+ mgr->num_delayed--;
+ bo->manager_id = 0;
+ LIST_DEL(&bo->list);
+ r600_bo_destroy(mgr->radeon, bo);
+ }
+}
+
+static INLINE int r600_bo_is_compat(struct r600_bomgr *mgr,
+ struct r600_bo *bo,
+ unsigned size,
+ unsigned alignment,
+ unsigned cfence)
+{
+ if(bo->size < size) {
+ return 0;
+ }
+
+ /* be lenient with size */
+ if(bo->size >= 2*size) {
+ return 0;
+ }
+
+ if(!pb_check_alignment(alignment, bo->alignment)) {
+ return 0;
+ }
+
+ if (!fence_is_after(cfence, bo->fence)) {
+ return 0;
+ }
+
+ return 1;
+}
+
+struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr,
+ unsigned size,
+ unsigned alignment,
+ unsigned cfence)
+{
+ struct r600_bo *bo, *tmp;
+ int64_t now;
+
+
+ pipe_mutex_lock(mgr->mutex);
+
+ now = os_time_get();
+ LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) {
+ if(r600_bo_is_compat(mgr, bo, size, alignment, cfence)) {
+ LIST_DEL(&bo->list);
+ --mgr->num_delayed;
+ r600_bomgr_timeout_flush(mgr);
+ pipe_mutex_unlock(mgr->mutex);
+ LIST_INITHEAD(&bo->list);
+ pipe_reference_init(&bo->reference, 1);
+ return bo;
+ }
+
+ if(os_time_timeout(bo->start, bo->end, now)) {
+ mgr->num_delayed--;
+ bo->manager_id = 0;
+ LIST_DEL(&bo->list);
+ r600_bo_destroy(mgr->radeon, bo);
+ }
+ }
+
+ pipe_mutex_unlock(mgr->mutex);
+ return NULL;
+}
+
+void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo)
+{
+ LIST_INITHEAD(&bo->list);
+ bo->manager_id = 1;
+}
+
+bool r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo)
+{
+ bo->start = os_time_get();
+ bo->end = bo->start + mgr->usecs;
+ pipe_mutex_lock(mgr->mutex);
+ LIST_ADDTAIL(&bo->list, &mgr->delayed);
+ ++mgr->num_delayed;
+ pipe_mutex_unlock(mgr->mutex);
+ return FALSE;
+}
+
+void r600_bomgr_destroy(struct r600_bomgr *mgr)
+{
+ struct r600_bo *bo, *tmp;
+
+ pipe_mutex_lock(mgr->mutex);
+ LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) {
+ mgr->num_delayed--;
+ bo->manager_id = 0;
+ LIST_DEL(&bo->list);
+ r600_bo_destroy(mgr->radeon, bo);
+ }
+ pipe_mutex_unlock(mgr->mutex);
+
+ FREE(mgr);
+}
+
+struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs)
+{
+ struct r600_bomgr *mgr;
+
+ mgr = CALLOC_STRUCT(r600_bomgr);
+ if (mgr == NULL)
+ return NULL;
+
+ mgr->radeon = radeon;
+ mgr->usecs = usecs;
+ LIST_INITHEAD(&mgr->delayed);
+ mgr->num_delayed = 0;
+ pipe_mutex_init(mgr->mutex);
+
+ return mgr;
+}
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index 8c847122f8..c081abb4dc 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -41,11 +41,48 @@
#define RADEON_INFO_TILING_CONFIG 0x6
#endif
-static struct radeon *radeon_new(int fd, unsigned device);
+#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ
+#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x9
+#endif
+
+#ifndef RADEON_INFO_NUM_BACKENDS
+#define RADEON_INFO_NUM_BACKENDS 0xa
+#endif
+
+enum radeon_family r600_get_family(struct radeon *r600)
+{
+ return r600->family;
+}
+
+enum chip_class r600_get_family_class(struct radeon *radeon)
+{
+ return radeon->chip_class;
+}
+
+struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon)
+{
+ return &radeon->tiling_info;
+}
+
+unsigned r600_get_clock_crystal_freq(struct radeon *radeon)
+{
+ return radeon->clock_crystal_freq;
+}
+
+unsigned r600_get_num_backends(struct radeon *radeon)
+{
+ return radeon->num_backends;
+}
+
+unsigned r600_get_minor_version(struct radeon *radeon)
+{
+ return radeon->minor_version;
+}
+
static int radeon_get_device(struct radeon *radeon)
{
- struct drm_radeon_info info;
+ struct drm_radeon_info info = {};
int r;
radeon->device = 0;
@@ -56,20 +93,8 @@ static int radeon_get_device(struct radeon *radeon)
return r;
}
-static int radeon_drm_get_tiling(struct radeon *radeon)
+static int r600_interpret_tiling(struct radeon *radeon, uint32_t tiling_config)
{
- struct drm_radeon_info info;
- int r;
- uint32_t tiling_config;
-
- info.request = RADEON_INFO_TILING_CONFIG;
- info.value = (uintptr_t)&tiling_config;
- r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
- sizeof(struct drm_radeon_info));
-
- if (r)
- return 0;
-
switch ((tiling_config & 0xe) >> 1) {
case 0:
radeon->tiling_info.num_channels = 1;
@@ -111,10 +136,117 @@ static int radeon_drm_get_tiling(struct radeon *radeon)
return 0;
}
+static int eg_interpret_tiling(struct radeon *radeon, uint32_t tiling_config)
+{
+ switch (tiling_config & 0xf) {
+ case 0:
+ radeon->tiling_info.num_channels = 1;
+ break;
+ case 1:
+ radeon->tiling_info.num_channels = 2;
+ break;
+ case 2:
+ radeon->tiling_info.num_channels = 4;
+ break;
+ case 3:
+ radeon->tiling_info.num_channels = 8;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ radeon->tiling_info.num_banks = (tiling_config & 0xf0) >> 4;
+
+ switch ((tiling_config & 0xf00) >> 8) {
+ case 0:
+ radeon->tiling_info.group_bytes = 256;
+ break;
+ case 1:
+ radeon->tiling_info.group_bytes = 512;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int radeon_drm_get_tiling(struct radeon *radeon)
+{
+ struct drm_radeon_info info;
+ int r;
+ uint32_t tiling_config = 0;
+
+ info.request = RADEON_INFO_TILING_CONFIG;
+ info.value = (uintptr_t)&tiling_config;
+ r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+ sizeof(struct drm_radeon_info));
+
+ if (r)
+ return 0;
+
+ if (radeon->chip_class == R600 || radeon->chip_class == R700) {
+ r = r600_interpret_tiling(radeon, tiling_config);
+ } else {
+ r = eg_interpret_tiling(radeon, tiling_config);
+ }
+ return r;
+}
+
+static int radeon_get_clock_crystal_freq(struct radeon *radeon)
+{
+ struct drm_radeon_info info;
+ uint32_t clock_crystal_freq;
+ int r;
+
+ radeon->device = 0;
+ info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ;
+ info.value = (uintptr_t)&clock_crystal_freq;
+ r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+ sizeof(struct drm_radeon_info));
+ if (r)
+ return r;
+
+ radeon->clock_crystal_freq = clock_crystal_freq;
+ return 0;
+}
+
+
+static int radeon_get_num_backends(struct radeon *radeon)
+{
+ struct drm_radeon_info info;
+ uint32_t num_backends;
+ int r;
+
+ radeon->device = 0;
+ info.request = RADEON_INFO_NUM_BACKENDS;
+ info.value = (uintptr_t)&num_backends;
+ r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+ sizeof(struct drm_radeon_info));
+ if (r)
+ return r;
+
+ radeon->num_backends = num_backends;
+ return 0;
+}
+
+
+static int radeon_init_fence(struct radeon *radeon)
+{
+ radeon->fence = 1;
+ radeon->fence_bo = r600_bo(radeon, 4096, 0, 0, 0);
+ if (radeon->fence_bo == NULL) {
+ return -ENOMEM;
+ }
+ radeon->cfence = r600_bo_map(radeon, radeon->fence_bo, PB_USAGE_UNSYNCHRONIZED, NULL);
+ *radeon->cfence = 0;
+ return 0;
+}
+
static struct radeon *radeon_new(int fd, unsigned device)
{
struct radeon *radeon;
int r;
+ drmVersionPtr version;
radeon = calloc(1, sizeof(*radeon));
if (radeon == NULL) {
@@ -123,71 +255,32 @@ static struct radeon *radeon_new(int fd, unsigned device)
radeon->fd = fd;
radeon->device = device;
radeon->refcount = 1;
- if (fd >= 0) {
- r = radeon_get_device(radeon);
- if (r) {
- fprintf(stderr, "Failed to get device id\n");
- return radeon_decref(radeon);
- }
+
+ version = drmGetVersion(radeon->fd);
+ if (version->version_major != 2) {
+ fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
+ "only compatible with 2.x.x\n", __FUNCTION__,
+ version->version_major, version->version_minor,
+ version->version_patchlevel);
+ drmFreeVersion(version);
+ exit(1);
+ }
+
+ radeon->minor_version = version->version_minor;
+
+ drmFreeVersion(version);
+
+ r = radeon_get_device(radeon);
+ if (r) {
+ fprintf(stderr, "Failed to get device id\n");
+ return radeon_decref(radeon);
}
+
radeon->family = radeon_family_from_device(radeon->device);
if (radeon->family == CHIP_UNKNOWN) {
fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device);
return radeon_decref(radeon);
}
- switch (radeon->family) {
- case CHIP_R600:
- case CHIP_RV610:
- case CHIP_RV630:
- case CHIP_RV670:
- case CHIP_RV620:
- case CHIP_RV635:
- case CHIP_RS780:
- case CHIP_RS880:
- case CHIP_RV770:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_RV740:
- case CHIP_CEDAR:
- case CHIP_REDWOOD:
- case CHIP_JUNIPER:
- case CHIP_CYPRESS:
- case CHIP_HEMLOCK:
- case CHIP_PALM:
- break;
- case CHIP_R100:
- case CHIP_RV100:
- case CHIP_RS100:
- case CHIP_RV200:
- case CHIP_RS200:
- case CHIP_R200:
- case CHIP_RV250:
- case CHIP_RS300:
- case CHIP_RV280:
- case CHIP_R300:
- case CHIP_R350:
- case CHIP_RV350:
- case CHIP_RV380:
- case CHIP_R420:
- case CHIP_R423:
- case CHIP_RV410:
- case CHIP_RS400:
- case CHIP_RS480:
- case CHIP_RS600:
- case CHIP_RS690:
- case CHIP_RS740:
- case CHIP_RV515:
- case CHIP_R520:
- case CHIP_RV530:
- case CHIP_RV560:
- case CHIP_RV570:
- case CHIP_R580:
- default:
- fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n",
- __func__, radeon->device);
- break;
- }
-
/* setup class */
switch (radeon->family) {
case CHIP_R600:
@@ -216,6 +309,9 @@ static struct radeon *radeon_new(int fd, unsigned device)
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
+ case CHIP_BARTS:
+ case CHIP_TURKS:
+ case CHIP_CAICOS:
radeon->chip_class = EVERGREEN;
/* set default group bytes, overridden by tiling info ioctl */
radeon->tiling_info.group_bytes = 512;
@@ -226,16 +322,24 @@ static struct radeon *radeon_new(int fd, unsigned device)
break;
}
- if (radeon->chip_class == R600 || radeon->chip_class == R700) {
- if (radeon_drm_get_tiling(radeon))
- return NULL;
- }
- radeon->kman = radeon_bo_pbmgr_create(radeon);
- if (!radeon->kman)
+ if (radeon_drm_get_tiling(radeon))
return NULL;
- radeon->cman = pb_cache_manager_create(radeon->kman, 1000000);
- if (!radeon->cman)
+
+ /* get the GPU counter frequency, failure is non fatal */
+ radeon_get_clock_crystal_freq(radeon);
+
+ if (radeon->minor_version >= 9)
+ radeon_get_num_backends(radeon);
+
+ radeon->bomgr = r600_bomgr_create(radeon, 1000000);
+ if (radeon->bomgr == NULL) {
return NULL;
+ }
+ r = radeon_init_fence(radeon);
+ if (r) {
+ radeon_decref(radeon);
+ return NULL;
+ }
return radeon;
}
@@ -252,14 +356,12 @@ struct radeon *radeon_decref(struct radeon *radeon)
return NULL;
}
- if (radeon->cman)
- radeon->cman->destroy(radeon->cman);
-
- if (radeon->kman)
- radeon->kman->destroy(radeon->kman);
+ if (radeon->fence_bo) {
+ r600_bo_reference(radeon, &radeon->fence_bo, NULL);
+ }
- if (radeon->fd >= 0)
- drmClose(radeon->fd);
+ if (radeon->bomgr)
+ r600_bomgr_destroy(radeon->bomgr);
free(radeon);
return NULL;
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 0f2724f61c..a7c21784e5 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -28,39 +28,25 @@
#include <string.h>
#include <stdlib.h>
#include <assert.h>
+#include <pipe/p_compiler.h>
+#include <util/u_inlines.h>
+#include <util/u_memory.h>
+#include <pipebuffer/pb_bufmgr.h>
#include "xf86drm.h"
-#include "r600.h"
-#include "r600d.h"
#include "radeon_drm.h"
-#include "bof.h"
-#include "pipe/p_compiler.h"
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-#include <pipebuffer/pb_bufmgr.h>
#include "r600_priv.h"
+#include "bof.h"
+#include "r600d.h"
#define GROUP_FORCE_NEW_BLOCK 0
-int r600_context_init_fence(struct r600_context *ctx)
-{
- ctx->fence = 1;
- ctx->fence_bo = r600_bo(ctx->radeon, 4096, 0, 0, 0);
- if (ctx->fence_bo == NULL) {
- return -ENOMEM;
- }
- ctx->cfence = r600_bo_map(ctx->radeon, ctx->fence_bo, PB_USAGE_UNSYNCHRONIZED, NULL);
- *ctx->cfence = 0;
- LIST_INITHEAD(&ctx->fenced_bo);
- return 0;
-}
-
static void INLINE r600_context_update_fenced_list(struct r600_context *ctx)
{
for (int i = 0; i < ctx->creloc; i++) {
if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist))
LIST_DELINIT(&ctx->bo[i]->fencedlist);
LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo);
- ctx->bo[i]->fence = ctx->fence;
+ ctx->bo[i]->fence = ctx->radeon->fence;
ctx->bo[i]->ctx = ctx;
}
}
@@ -71,7 +57,7 @@ static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsig
struct radeon_bo *tmp;
LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) {
- if (bo->fence <= *ctx->cfence) {
+ if (bo->fence <= *ctx->radeon->cfence) {
LIST_DELINIT(&bo->fencedlist);
bo->fence = 0;
} else {
@@ -121,7 +107,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
/* initialize block */
block->start_offset = reg[i].offset;
- block->pm4[block->pm4_ndwords++] = PKT3(reg[i].opcode, n);
+ block->pm4[block->pm4_ndwords++] = PKT3(reg[i].opcode, n, 0);
block->pm4[block->pm4_ndwords++] = (block->start_offset - reg[i].offset_base) >> 2;
block->reg = &block->pm4[block->pm4_ndwords];
block->pm4_ndwords += n;
@@ -133,7 +119,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
block->nbo++;
assert(block->nbo < R600_BLOCK_MAX_BO);
block->pm4_bo_index[j] = block->nbo;
- block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0);
+ block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0);
block->pm4[block->pm4_ndwords++] = 0x00000000;
block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags;
block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask;
@@ -632,9 +618,6 @@ void r600_context_fini(struct r600_context *ctx)
free(ctx->pm4);
r600_context_clear_fenced_bo(ctx);
- if (ctx->fence_bo) {
- r600_bo_reference(ctx->radeon, &ctx->fence_bo, NULL);
- }
memset(ctx, 0, sizeof(struct r600_context));
}
@@ -654,7 +637,8 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
ctx->range[i].end_offset = ((i + 1) << ctx->hash_shift) - 1;
ctx->range[i].blocks = calloc(1 << ctx->hash_shift, sizeof(void*));
if (ctx->range[i].blocks == NULL) {
- return -ENOMEM;
+ r = -ENOMEM;
+ goto out_err;
}
}
@@ -763,19 +747,30 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
/* save 16dwords space for fence mecanism */
ctx->pm4_ndwords -= 16;
- r = r600_context_init_fence(ctx);
- if (r) {
- goto out_err;
- }
+ LIST_INITHEAD(&ctx->fenced_bo);
/* init dirty list */
LIST_INITHEAD(&ctx->dirty);
+
+ ctx->max_db = 4;
+
return 0;
out_err:
r600_context_fini(ctx);
return r;
}
+static void rv6xx_context_surface_base_update(struct r600_context *ctx,
+ unsigned base_update_flags)
+{
+ /* need to emit surface base update on rv6xx */
+ if ((ctx->radeon->family > CHIP_R600) &&
+ (ctx->radeon->family < CHIP_RV770)) {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = base_update_flags;
+ }
+}
+
void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
unsigned flush_mask, struct r600_bo *rbo)
{
@@ -787,12 +782,12 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
bo->last_flush &= flush_mask;
return;
}
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = flush_flags;
ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8;
ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id;
bo->last_flush = (bo->last_flush | flush_flags) & flush_mask;
}
@@ -814,6 +809,7 @@ void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *r
ctx->reloc[ctx->creloc].write_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM);
ctx->reloc[ctx->creloc].flags = 0;
radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo);
+ rbo->fence = ctx->radeon->fence;
ctx->creloc++;
/* set PKT3 to point to proper reloc */
*pm4 = bo->reloc_id;
@@ -836,6 +832,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
/* find relocation */
id = block->pm4_bo_index[id];
r600_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo);
+ state->regs[i].bo->fence = ctx->radeon->fence;
}
if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
block->status |= R600_BLOCK_STATUS_ENABLED;
@@ -875,10 +872,13 @@ static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx
*/
r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo);
r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo);
+ state->regs[0].bo->fence = ctx->radeon->fence;
} else {
/* TEXTURE RESOURCE */
r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
+ state->regs[2].bo->fence = ctx->radeon->fence;
+ state->regs[3].bo->fence = ctx->radeon->fence;
}
if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
block->status |= R600_BLOCK_STATUS_ENABLED;
@@ -1002,6 +1002,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
unsigned ndwords = 9;
struct r600_block *dirty_block = NULL;
struct r600_block *next_block;
+ unsigned rv6xx_surface_base_update = 0;
if (draw->indices) {
ndwords = 13;
@@ -1024,10 +1025,14 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
for (int i = 0; i < 8; i++) {
if (cb[i]) {
ndwords += 7;
+ rv6xx_surface_base_update |= SURFACE_BASE_UPDATE_COLOR(i);
}
}
- if (db)
+ if (db) {
ndwords += 7;
+ rv6xx_surface_base_update |= SURFACE_BASE_UPDATE_DEPTH;
+ }
+ /* XXX also need to update SURFACE_BASE_UPDATE_STRMOUT when we support it */
/* queries need some special values */
if (ctx->num_query_running) {
@@ -1054,30 +1059,34 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
}
/* enough room to copy packet */
- LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->dirty,list) {
+ LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->dirty, list) {
r600_context_block_emit_dirty(ctx, dirty_block);
}
+ /* rv6xx surface base udpate */
+ if (rv6xx_surface_base_update)
+ rv6xx_context_surface_base_update(ctx, rv6xx_surface_base_update);
+
/* draw packet */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances;
if (draw->indices) {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices);
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = 0;
r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices);
} else {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
}
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
/* flush color buffer */
@@ -1099,7 +1108,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
void r600_context_flush(struct r600_context *ctx)
{
- struct drm_radeon_cs drmib;
+ struct drm_radeon_cs drmib = {};
struct drm_radeon_cs_chunk chunks[2];
uint64_t chunk_array[2];
unsigned fence;
@@ -1112,17 +1121,17 @@ void r600_context_flush(struct r600_context *ctx)
r600_context_queries_suspend(ctx);
/* emit fence */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24);
- ctx->pm4[ctx->pm4_cdwords++] = ctx->fence;
+ ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence;
ctx->pm4[ctx->pm4_cdwords++] = 0;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = 0;
- r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->fence_bo);
+ r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo);
#if 1
/* emit cs */
@@ -1139,18 +1148,18 @@ void r600_context_flush(struct r600_context *ctx)
r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib,
sizeof(struct drm_radeon_cs));
#else
- *ctx->cfence = ctx->fence;
+ *ctx->radeon->cfence = ctx->radeon->fence;
#endif
r600_context_update_fenced_list(ctx);
- fence = ctx->fence + 1;
- if (fence < ctx->fence) {
+ fence = ctx->radeon->fence + 1;
+ if (fence < ctx->radeon->fence) {
/* wrap around */
fence = 1;
r600_context_fence_wraparound(ctx, fence);
}
- ctx->fence = fence;
+ ctx->radeon->fence = fence;
/* restart */
for (int i = 0; i < ctx->creloc; i++) {
@@ -1261,44 +1270,90 @@ out_err:
bof_decref(root);
}
-static void r600_query_result(struct r600_context *ctx, struct r600_query *query)
+static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
{
u64 start, end;
u32 *results;
int i;
+ int size;
+
+ if (wait)
+ results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_CPU_READ, NULL);
+ else
+ results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_READ, NULL);
+ if (!results)
+ return FALSE;
- results = r600_bo_map(ctx->radeon, query->buffer, 0, NULL);
- for (i = 0; i < query->num_results; i += 4) {
+ size = query->num_results * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1);
+ for (i = 0; i < size; i += 4) {
start = (u64)results[i] | (u64)results[i + 1] << 32;
end = (u64)results[i + 2] | (u64)results[i + 3] << 32;
- if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) {
+ if (((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))
+ || query->type == PIPE_QUERY_TIME_ELAPSED) {
query->result += end - start;
}
}
r600_bo_unmap(ctx->radeon, query->buffer);
query->num_results = 0;
+
+ return TRUE;
}
void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
{
- /* query request needs 6 dwords for begin + 6 dwords for end */
- if ((12 + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
+ unsigned required_space;
+ int num_backends = r600_get_num_backends(ctx->radeon);
+
+ /* query request needs 6/8 dwords for begin + 6/8 dwords for end */
+ if (query->type == PIPE_QUERY_TIME_ELAPSED)
+ required_space = 16;
+ else
+ required_space = 12;
+
+ if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
/* need to flush */
r600_context_flush(ctx);
}
/* if query buffer is full force a flush */
- if (query->num_results >= ((query->buffer_size >> 2) - 2)) {
+ if (query->num_results*4 >= query->buffer_size - 16) {
r600_context_flush(ctx);
- r600_query_result(ctx, query);
+ r600_query_result(ctx, query, TRUE);
}
+ if (query->type == PIPE_QUERY_OCCLUSION_COUNTER &&
+ num_backends > 0 && num_backends < ctx->max_db) {
+ /* as per info on ZPASS the driver must set the unusued DB top bits */
+ u32 *results;
+ int i;
+
+ results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_WRITE, NULL);
+ if (results) {
+ memset(results + (query->num_results * 4), 0, ctx->max_db * 4 * 4);
+
+ for (i = num_backends; i < ctx->max_db; i++) {
+ results[(i * 4)+1] = 0x80000000;
+ results[(i * 4)+3] = 0x80000000;
+ }
+ r600_bo_unmap(ctx->radeon, query->buffer);
+ }
+ }
+
/* emit begin query */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
- ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
- ctx->pm4[ctx->pm4_cdwords++] = query->num_results + r600_bo_offset(query->buffer);
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ if (query->type == PIPE_QUERY_TIME_ELAPSED) {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
+ ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer);
+ ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ } else {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
+ ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ }
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = 0;
r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
@@ -1310,25 +1365,56 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
void r600_query_end(struct r600_context *ctx, struct r600_query *query)
{
/* emit begin query */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
- ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
- ctx->pm4[ctx->pm4_cdwords++] = query->num_results + 8 + r600_bo_offset(query->buffer);
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ if (query->type == PIPE_QUERY_TIME_ELAPSED) {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
+ ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer);
+ ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ } else {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
+ ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ }
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = 0;
r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
- query->num_results += 16;
+ query->num_results += 4 * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1);
query->state ^= R600_QUERY_STATE_STARTED;
query->state |= R600_QUERY_STATE_ENDED;
ctx->num_query_running--;
}
+void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
+ int flag_wait)
+{
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
+
+ if (operation == PREDICATION_OP_CLEAR) {
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(PREDICATION_OP_CLEAR);
+ } else {
+ int results_base = query->num_results - (4 * ctx->max_db);
+
+ if (results_base < 0)
+ results_base = 0;
+
+ ctx->pm4[ctx->pm4_cdwords++] = results_base*4 + r600_bo_offset(query->buffer);
+ ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(operation) | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW) | PREDICATION_DRAW_VISIBLE;
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+ }
+}
+
struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
{
struct r600_query *query;
- if (query_type != PIPE_QUERY_OCCLUSION_COUNTER)
+ if (query_type != PIPE_QUERY_OCCLUSION_COUNTER && query_type != PIPE_QUERY_TIME_ELAPSED)
return NULL;
query = calloc(1, sizeof(struct r600_query));
@@ -1370,8 +1456,12 @@ boolean r600_context_query_result(struct r600_context *ctx,
if (query->num_results) {
r600_context_flush(ctx);
}
- r600_query_result(ctx, query);
- *result = query->result;
+ if (!r600_query_result(ctx, query, wait))
+ return FALSE;
+ if (query->type == PIPE_QUERY_TIME_ELAPSED)
+ *result = (1000000*query->result)/r600_get_clock_crystal_freq(ctx->radeon);
+ else
+ *result = query->result;
query->result = 0;
return TRUE;
}
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 193af984f8..41c5ee02c3 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -30,24 +30,30 @@
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
-#include <pipebuffer/pb_bufmgr.h>
-#include "util/u_double_list.h"
+#include <util/u_double_list.h>
+#include <util/u_inlines.h>
+#include <os/os_thread.h>
#include "r600.h"
+struct r600_bomgr;
+struct r600_bo;
+
struct radeon {
int fd;
int refcount;
unsigned device;
unsigned family;
enum chip_class chip_class;
- struct pb_manager *kman; /* kernel bo manager */
- struct pb_manager *cman; /* cached bo manager */
- struct r600_tiling_info tiling_info;
+ struct r600_tiling_info tiling_info;
+ struct r600_bomgr *bomgr;
+ unsigned fence;
+ unsigned *cfence;
+ struct r600_bo *fence_bo;
+ unsigned clock_crystal_freq;
+ unsigned num_backends;
+ unsigned minor_version;
};
-struct radeon *r600_new(int fd, unsigned device);
-void r600_delete(struct radeon *r600);
-
struct r600_reg {
unsigned opcode;
unsigned offset_base;
@@ -75,25 +81,49 @@ struct radeon_bo {
struct r600_bo {
struct pipe_reference reference;
- struct pb_buffer *pb;
unsigned size;
unsigned tiling_flags;
unsigned kernel_pitch;
unsigned domains;
+ struct radeon_bo *bo;
+ unsigned fence;
+ /* manager data */
+ struct list_head list;
+ unsigned manager_id;
+ unsigned alignment;
+ unsigned offset;
+ int64_t start;
+ int64_t end;
};
+struct r600_bomgr {
+ struct radeon *radeon;
+ unsigned usecs;
+ pipe_mutex mutex;
+ struct list_head delayed;
+ unsigned num_delayed;
+};
-/* radeon_pciid.c */
+/*
+ * r600_drm.c
+ */
+struct radeon *r600_new(int fd, unsigned device);
+void r600_delete(struct radeon *r600);
+
+/*
+ * radeon_pciid.c
+ */
unsigned radeon_family_from_device(unsigned device);
-/* radeon_bo.c */
+/*
+ * radeon_bo.c
+ */
struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
unsigned size, unsigned alignment);
void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst,
struct radeon_bo *src);
int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain);
-void radeon_bo_pbmgr_flush_maps(struct pb_manager *_mgr);
int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo);
int radeon_bo_get_tiling_flags(struct radeon *radeon,
struct radeon_bo *bo,
@@ -103,13 +133,9 @@ int radeon_bo_get_name(struct radeon *radeon,
struct radeon_bo *bo,
uint32_t *name);
-/* radeon_bo_pb.c */
-struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf);
-struct pb_manager *radeon_bo_pbmgr_create(struct radeon *radeon);
-struct pb_buffer *radeon_bo_pb_create_buffer_from_handle(struct pb_manager *_mgr,
- uint32_t handle);
-
-/* r600_hw_context.c */
+/*
+ * r600_hw_context.c
+ */
int r600_context_init_fence(struct r600_context *ctx);
void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo);
void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
@@ -117,14 +143,27 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset);
int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg);
-/* r600_bo.c */
-unsigned r600_bo_get_handle(struct r600_bo *bo);
-unsigned r600_bo_get_size(struct r600_bo *bo);
-static INLINE struct radeon_bo *r600_bo_get_bo(struct r600_bo *bo)
-{
- return radeon_bo_pb_get_bo(bo->pb);
-}
+/*
+ * r600_bo.c
+ */
+void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo);
+/*
+ * r600_bomgr.c
+ */
+struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs);
+void r600_bomgr_destroy(struct r600_bomgr *mgr);
+bool r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo);
+void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo);
+struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr,
+ unsigned size,
+ unsigned alignment,
+ unsigned cfence);
+
+
+/*
+ * helpers
+ */
#define CTX_RANGE_ID(ctx, offset) (((offset) >> (ctx)->hash_shift) & 255)
#define CTX_BLOCK_ID(ctx, offset) ((offset) & ((1 << (ctx)->hash_shift) - 1))
@@ -172,6 +211,9 @@ static inline void r600_context_block_emit_dirty(struct r600_context *ctx, struc
LIST_DELINIT(&block->list);
}
+/*
+ * radeon_bo.c
+ */
static inline int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo)
{
bo->map_count++;
@@ -184,4 +226,35 @@ static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo)
assert(bo->map_count >= 0);
}
+/*
+ * r600_bo
+ */
+static inline struct radeon_bo *r600_bo_get_bo(struct r600_bo *bo)
+{
+ return bo->bo;
+}
+
+static unsigned inline r600_bo_get_handle(struct r600_bo *bo)
+{
+ return bo->bo->handle;
+}
+
+static unsigned inline r600_bo_get_size(struct r600_bo *bo)
+{
+ return bo->size;
+}
+
+/*
+ * fence
+ */
+static inline bool fence_is_after(unsigned fence, unsigned ofence)
+{
+ /* handle wrap around */
+ if (fence < 0x80000000 && ofence > 0x80000000)
+ return TRUE;
+ if (fence > ofence)
+ return TRUE;
+ return FALSE;
+}
+
#endif
diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h
index 1c1ac76fe6..8042481804 100644
--- a/src/gallium/winsys/r600/drm/r600d.h
+++ b/src/gallium/winsys/r600/drm/r600d.h
@@ -90,6 +90,9 @@
#define PKT3_SET_SAMPLER 0x6E
#define PKT3_SET_CTL_CONST 0x6F
#define PKT3_SURFACE_BASE_UPDATE 0x73
+#define SURFACE_BASE_UPDATE_DEPTH (1 << 0)
+#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x))
+#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
@@ -105,6 +108,18 @@
* 5 - TS events
*/
+#define PREDICATION_OP_CLEAR 0x0
+#define PREDICATION_OP_ZPASS 0x1
+#define PREDICATION_OP_PRIMCOUNT 0x2
+
+#define PRED_OP(x) ((x) << 16)
+
+#define PREDICATION_HINT_WAIT (0 << 12)
+#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
+
+#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
+#define PREDICATION_DRAW_VISIBLE (1 << 8)
+
#define PKT_TYPE_S(x) (((x) & 0x3) << 30)
#define PKT_TYPE_G(x) (((x) >> 30) & 0x3)
#define PKT_TYPE_C 0x3FFFFFFF
@@ -117,8 +132,9 @@
#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8)
#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
#define PKT3_IT_OPCODE_C 0xFFFF00FF
+#define PKT3_PRED_S(x) (((x) >> 0) & 0x1)
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count))
+#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate))
/* Registers */
#define R_0280A0_CB_COLOR0_INFO 0x0280A0
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 557cfb9597..ba02a3c2bd 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -33,6 +33,25 @@
#include "xf86drm.h"
#include "radeon_drm.h"
+#ifdef ANDROID
+
+extern void* __mmap2(void*, size_t, int, int, int, size_t);
+
+#define MMAP2_SHIFT 12
+static void* android_mmap2(void *addr, size_t size, int prot, int flags, int fd, unsigned long long offset)
+{
+ if ( offset & ((1UL << MMAP2_SHIFT)-1) ) {
+ errno = EINVAL;
+ return MAP_FAILED;
+ }
+
+ return __mmap2(addr, size, prot, flags, fd, (size_t)(offset >> MMAP2_SHIFT));
+}
+
+#define mmap(addr, size, prot, flags, fd, offset) android_mmap2(addr, size, prot, flags, fd, offset)
+
+#endif /* ANDROID */
+
static int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo)
{
struct drm_radeon_gem_mmap args;
@@ -98,7 +117,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
bo->size = open_arg.size;
bo->shared = TRUE;
} else {
- struct drm_radeon_gem_create args;
+ struct drm_radeon_gem_create args = {};
args.size = size;
args.alignment = alignment;
@@ -156,7 +175,7 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo)
if (!bo->shared) {
if (!bo->fence)
return 0;
- if (bo->fence <= *bo->ctx->cfence) {
+ if (bo->fence <= *radeon->cfence) {
LIST_DELINIT(&bo->fencedlist);
bo->fence = 0;
return 0;
@@ -181,7 +200,7 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain
if (!bo->shared) {
if (!bo->fence)
return 0;
- if (bo->fence <= *bo->ctx->cfence) {
+ if (bo->fence <= *radeon->cfence) {
LIST_DELINIT(&bo->fencedlist);
bo->fence = 0;
return 0;
@@ -204,15 +223,15 @@ int radeon_bo_get_tiling_flags(struct radeon *radeon,
uint32_t *tiling_flags,
uint32_t *pitch)
{
- struct drm_radeon_gem_get_tiling args;
+ struct drm_radeon_gem_get_tiling args = {};
int ret;
-
+
args.handle = bo->handle;
ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_GET_TILING,
&args, sizeof(args));
if (ret)
return ret;
-
+
*tiling_flags = args.tiling_flags;
*pitch = args.pitch;
return ret;
diff --git a/src/gallium/winsys/r600/drm/radeon_bo_pb.c b/src/gallium/winsys/r600/drm/radeon_bo_pb.c
deleted file mode 100644
index 4bd3ae3ca1..0000000000
--- a/src/gallium/winsys/r600/drm/radeon_bo_pb.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright 2010 Dave Airlie
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Dave Airlie
- */
-#include <util/u_inlines.h>
-#include <util/u_memory.h>
-#include <util/u_double_list.h>
-#include <pipebuffer/pb_buffer.h>
-#include <pipebuffer/pb_bufmgr.h>
-#include "r600_priv.h"
-
-struct radeon_bo_pb {
- struct pb_buffer b;
- struct radeon_bo *bo;
-
- struct radeon_bo_pbmgr *mgr;
-};
-
-extern const struct pb_vtbl radeon_bo_pb_vtbl;
-
-static INLINE struct radeon_bo_pb *radeon_bo_pb(struct pb_buffer *buf)
-{
- assert(buf);
- assert(buf->vtbl == &radeon_bo_pb_vtbl);
- return (struct radeon_bo_pb *)buf;
-}
-
-struct radeon_bo_pbmgr {
- struct pb_manager b;
- struct radeon *radeon;
-};
-
-static INLINE struct radeon_bo_pbmgr *radeon_bo_pbmgr(struct pb_manager *mgr)
-{
- assert(mgr);
- return (struct radeon_bo_pbmgr *)mgr;
-}
-
-static void radeon_bo_pb_destroy(struct pb_buffer *_buf)
-{
- struct radeon_bo_pb *buf = radeon_bo_pb(_buf);
-
- /* If this buffer is on the list of buffers to unmap,
- * do the unmapping now.
- */
- radeon_bo_unmap(buf->mgr->radeon, buf->bo);
- radeon_bo_reference(buf->mgr->radeon, &buf->bo, NULL);
- FREE(buf);
-}
-
-static void *
-radeon_bo_pb_map_internal(struct pb_buffer *_buf,
- unsigned flags, void *ctx)
-{
- struct radeon_bo_pb *buf = radeon_bo_pb(_buf);
- struct pipe_context *pctx = ctx;
-
- if (flags & PB_USAGE_UNSYNCHRONIZED) {
- if (radeon_bo_map(buf->mgr->radeon, buf->bo)) {
- return NULL;
- }
- return buf->bo->data;
- }
-
- if (p_atomic_read(&buf->bo->reference.count) > 1) {
- if (flags & PB_USAGE_DONTBLOCK) {
- return NULL;
- }
- if (ctx) {
- pctx->flush(pctx, 0, NULL);
- }
- }
-
- if (flags & PB_USAGE_DONTBLOCK) {
- uint32_t domain;
- if (radeon_bo_busy(buf->mgr->radeon, buf->bo, &domain))
- return NULL;
- if (radeon_bo_map(buf->mgr->radeon, buf->bo)) {
- return NULL;
- }
- goto out;
- }
-
- if (radeon_bo_map(buf->mgr->radeon, buf->bo)) {
- return NULL;
- }
- if (radeon_bo_wait(buf->mgr->radeon, buf->bo)) {
- radeon_bo_unmap(buf->mgr->radeon, buf->bo);
- return NULL;
- }
-out:
- return buf->bo->data;
-}
-
-static void radeon_bo_pb_unmap_internal(struct pb_buffer *_buf)
-{
-}
-
-static void
-radeon_bo_pb_get_base_buffer(struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset)
-{
- *base_buf = buf;
- *offset = 0;
-}
-
-static enum pipe_error
-radeon_bo_pb_validate(struct pb_buffer *_buf,
- struct pb_validate *vl,
- unsigned flags)
-{
- /* Always pinned */
- return PIPE_OK;
-}
-
-static void
-radeon_bo_pb_fence(struct pb_buffer *buf,
- struct pipe_fence_handle *fence)
-{
-}
-
-const struct pb_vtbl radeon_bo_pb_vtbl = {
- radeon_bo_pb_destroy,
- radeon_bo_pb_map_internal,
- radeon_bo_pb_unmap_internal,
- radeon_bo_pb_validate,
- radeon_bo_pb_fence,
- radeon_bo_pb_get_base_buffer,
-};
-
-struct pb_buffer *
-radeon_bo_pb_create_buffer_from_handle(struct pb_manager *_mgr,
- uint32_t handle)
-{
- struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr);
- struct radeon *radeon = mgr->radeon;
- struct radeon_bo_pb *bo;
- struct radeon_bo *hw_bo;
-
- hw_bo = radeon_bo(radeon, handle, 0, 0);
- if (hw_bo == NULL)
- return NULL;
-
- bo = CALLOC_STRUCT(radeon_bo_pb);
- if (!bo) {
- radeon_bo_reference(radeon, &hw_bo, NULL);
- return NULL;
- }
-
- pipe_reference_init(&bo->b.base.reference, 1);
- bo->b.base.alignment = 0;
- bo->b.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
- bo->b.base.size = hw_bo->size;
- bo->b.vtbl = &radeon_bo_pb_vtbl;
- bo->mgr = mgr;
-
- bo->bo = hw_bo;
-
- return &bo->b;
-}
-
-static struct pb_buffer *
-radeon_bo_pb_create_buffer(struct pb_manager *_mgr,
- pb_size size,
- const struct pb_desc *desc)
-{
- struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr);
- struct radeon *radeon = mgr->radeon;
- struct radeon_bo_pb *bo;
-
- bo = CALLOC_STRUCT(radeon_bo_pb);
- if (!bo)
- goto error1;
-
- pipe_reference_init(&bo->b.base.reference, 1);
- bo->b.base.alignment = desc->alignment;
- bo->b.base.usage = desc->usage;
- bo->b.base.size = size;
- bo->b.vtbl = &radeon_bo_pb_vtbl;
- bo->mgr = mgr;
-
- bo->bo = radeon_bo(radeon, 0, size, desc->alignment);
- if (bo->bo == NULL)
- goto error2;
- return &bo->b;
-
-error2:
- FREE(bo);
-error1:
- return NULL;
-}
-
-static void
-radeon_bo_pbmgr_flush(struct pb_manager *mgr)
-{
- /* NOP */
-}
-
-static void
-radeon_bo_pbmgr_destroy(struct pb_manager *_mgr)
-{
- struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr);
- FREE(mgr);
-}
-
-struct pb_manager *radeon_bo_pbmgr_create(struct radeon *radeon)
-{
- struct radeon_bo_pbmgr *mgr;
-
- mgr = CALLOC_STRUCT(radeon_bo_pbmgr);
- if (!mgr)
- return NULL;
-
- mgr->b.destroy = radeon_bo_pbmgr_destroy;
- mgr->b.create_buffer = radeon_bo_pb_create_buffer;
- mgr->b.flush = radeon_bo_pbmgr_flush;
-
- mgr->radeon = radeon;
- return &mgr->b;
-}
-
-struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf)
-{
- struct radeon_bo_pb *buf;
- if (_buf->vtbl == &radeon_bo_pb_vtbl) {
- buf = radeon_bo_pb(_buf);
- return buf->bo;
- } else {
- struct pb_buffer *base_buf;
- pb_size offset;
- pb_get_base_buffer(_buf, &base_buf, &offset);
- if (base_buf->vtbl == &radeon_bo_pb_vtbl) {
- buf = radeon_bo_pb(base_buf);
- return buf->bo;
- }
- }
- return NULL;
-}
diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c
index 92560a488a..f19956931d 100644
--- a/src/gallium/winsys/r600/drm/radeon_pciid.c
+++ b/src/gallium/winsys/r600/drm/radeon_pciid.c
@@ -32,7 +32,7 @@ struct pci_id {
unsigned family;
};
-struct pci_id radeon_pci_id[] = {
+static const struct pci_id radeon_pci_id[] = {
{0x1002, 0x3150, CHIP_RV380},
{0x1002, 0x3152, CHIP_RV380},
{0x1002, 0x3154, CHIP_RV380},
@@ -40,29 +40,29 @@ struct pci_id radeon_pci_id[] = {
{0x1002, 0x3E54, CHIP_RV380},
{0x1002, 0x4136, CHIP_RS100},
{0x1002, 0x4137, CHIP_RS200},
- {0x1002, 0x4144, CHIP_R300},
- {0x1002, 0x4145, CHIP_R300},
- {0x1002, 0x4146, CHIP_R300},
- {0x1002, 0x4147, CHIP_R300},
- {0x1002, 0x4148, CHIP_R350},
- {0x1002, 0x4149, CHIP_R350},
- {0x1002, 0x414A, CHIP_R350},
- {0x1002, 0x414B, CHIP_R350},
- {0x1002, 0x4150, CHIP_RV350},
- {0x1002, 0x4151, CHIP_RV350},
- {0x1002, 0x4152, CHIP_RV350},
- {0x1002, 0x4153, CHIP_RV350},
- {0x1002, 0x4154, CHIP_RV350},
- {0x1002, 0x4155, CHIP_RV350},
- {0x1002, 0x4156, CHIP_RV350},
+ {0x1002, 0x4144, CHIP_R300},
+ {0x1002, 0x4145, CHIP_R300},
+ {0x1002, 0x4146, CHIP_R300},
+ {0x1002, 0x4147, CHIP_R300},
+ {0x1002, 0x4148, CHIP_R350},
+ {0x1002, 0x4149, CHIP_R350},
+ {0x1002, 0x414A, CHIP_R350},
+ {0x1002, 0x414B, CHIP_R350},
+ {0x1002, 0x4150, CHIP_RV350},
+ {0x1002, 0x4151, CHIP_RV350},
+ {0x1002, 0x4152, CHIP_RV350},
+ {0x1002, 0x4153, CHIP_RV350},
+ {0x1002, 0x4154, CHIP_RV350},
+ {0x1002, 0x4155, CHIP_RV350},
+ {0x1002, 0x4156, CHIP_RV350},
{0x1002, 0x4237, CHIP_RS200},
- {0x1002, 0x4242, CHIP_R200},
- {0x1002, 0x4243, CHIP_R200},
+ {0x1002, 0x4242, CHIP_R200},
+ {0x1002, 0x4243, CHIP_R200},
{0x1002, 0x4336, CHIP_RS100},
{0x1002, 0x4337, CHIP_RS200},
{0x1002, 0x4437, CHIP_RS200},
- {0x1002, 0x4966, CHIP_RV250},
- {0x1002, 0x4967, CHIP_RV250},
+ {0x1002, 0x4966, CHIP_RV250},
+ {0x1002, 0x4967, CHIP_RV250},
{0x1002, 0x4A48, CHIP_R420},
{0x1002, 0x4A49, CHIP_R420},
{0x1002, 0x4A4A, CHIP_R420},
@@ -85,14 +85,14 @@ struct pci_id radeon_pci_id[] = {
{0x1002, 0x4C64, CHIP_RV250},
{0x1002, 0x4C66, CHIP_RV250},
{0x1002, 0x4C67, CHIP_RV250},
- {0x1002, 0x4E44, CHIP_R300},
- {0x1002, 0x4E45, CHIP_R300},
- {0x1002, 0x4E46, CHIP_R300},
- {0x1002, 0x4E47, CHIP_R300},
- {0x1002, 0x4E48, CHIP_R350},
- {0x1002, 0x4E49, CHIP_R350},
- {0x1002, 0x4E4A, CHIP_R350},
- {0x1002, 0x4E4B, CHIP_R350},
+ {0x1002, 0x4E44, CHIP_R300},
+ {0x1002, 0x4E45, CHIP_R300},
+ {0x1002, 0x4E46, CHIP_R300},
+ {0x1002, 0x4E47, CHIP_R300},
+ {0x1002, 0x4E48, CHIP_R350},
+ {0x1002, 0x4E49, CHIP_R350},
+ {0x1002, 0x4E4A, CHIP_R350},
+ {0x1002, 0x4E4B, CHIP_R350},
{0x1002, 0x4E50, CHIP_RV350},
{0x1002, 0x4E51, CHIP_RV350},
{0x1002, 0x4E52, CHIP_RV350},
@@ -103,13 +103,13 @@ struct pci_id radeon_pci_id[] = {
{0x1002, 0x5145, CHIP_R100},
{0x1002, 0x5146, CHIP_R100},
{0x1002, 0x5147, CHIP_R100},
- {0x1002, 0x5148, CHIP_R200},
- {0x1002, 0x514C, CHIP_R200},
- {0x1002, 0x514D, CHIP_R200},
- {0x1002, 0x5157, CHIP_RV200},
- {0x1002, 0x5158, CHIP_RV200},
- {0x1002, 0x5159, CHIP_RV100},
- {0x1002, 0x515A, CHIP_RV100},
+ {0x1002, 0x5148, CHIP_R200},
+ {0x1002, 0x514C, CHIP_R200},
+ {0x1002, 0x514D, CHIP_R200},
+ {0x1002, 0x5157, CHIP_RV200},
+ {0x1002, 0x5158, CHIP_RV200},
+ {0x1002, 0x5159, CHIP_RV100},
+ {0x1002, 0x515A, CHIP_RV100},
{0x1002, 0x515E, CHIP_RV100},
{0x1002, 0x5460, CHIP_RV380},
{0x1002, 0x5462, CHIP_RV380},
@@ -138,10 +138,10 @@ struct pci_id radeon_pci_id[] = {
{0x1002, 0x5955, CHIP_RS480},
{0x1002, 0x5974, CHIP_RS480},
{0x1002, 0x5975, CHIP_RS480},
- {0x1002, 0x5960, CHIP_RV280},
- {0x1002, 0x5961, CHIP_RV280},
- {0x1002, 0x5962, CHIP_RV280},
- {0x1002, 0x5964, CHIP_RV280},
+ {0x1002, 0x5960, CHIP_RV280},
+ {0x1002, 0x5961, CHIP_RV280},
+ {0x1002, 0x5962, CHIP_RV280},
+ {0x1002, 0x5964, CHIP_RV280},
{0x1002, 0x5965, CHIP_RV280},
{0x1002, 0x5969, CHIP_RV100},
{0x1002, 0x5a41, CHIP_RS400},
@@ -445,6 +445,42 @@ struct pci_id radeon_pci_id[] = {
{0x1002, 0x9803, CHIP_PALM},
{0x1002, 0x9804, CHIP_PALM},
{0x1002, 0x9805, CHIP_PALM},
+ {0x1002, 0x6720, CHIP_BARTS},
+ {0x1002, 0x6721, CHIP_BARTS},
+ {0x1002, 0x6722, CHIP_BARTS},
+ {0x1002, 0x6723, CHIP_BARTS},
+ {0x1002, 0x6724, CHIP_BARTS},
+ {0x1002, 0x6725, CHIP_BARTS},
+ {0x1002, 0x6726, CHIP_BARTS},
+ {0x1002, 0x6727, CHIP_BARTS},
+ {0x1002, 0x6728, CHIP_BARTS},
+ {0x1002, 0x6729, CHIP_BARTS},
+ {0x1002, 0x6738, CHIP_BARTS},
+ {0x1002, 0x6739, CHIP_BARTS},
+ {0x1002, 0x6740, CHIP_TURKS},
+ {0x1002, 0x6741, CHIP_TURKS},
+ {0x1002, 0x6742, CHIP_TURKS},
+ {0x1002, 0x6743, CHIP_TURKS},
+ {0x1002, 0x6744, CHIP_TURKS},
+ {0x1002, 0x6745, CHIP_TURKS},
+ {0x1002, 0x6746, CHIP_TURKS},
+ {0x1002, 0x6747, CHIP_TURKS},
+ {0x1002, 0x6748, CHIP_TURKS},
+ {0x1002, 0x6749, CHIP_TURKS},
+ {0x1002, 0x6750, CHIP_TURKS},
+ {0x1002, 0x6758, CHIP_TURKS},
+ {0x1002, 0x6759, CHIP_TURKS},
+ {0x1002, 0x6760, CHIP_CAICOS},
+ {0x1002, 0x6761, CHIP_CAICOS},
+ {0x1002, 0x6762, CHIP_CAICOS},
+ {0x1002, 0x6763, CHIP_CAICOS},
+ {0x1002, 0x6764, CHIP_CAICOS},
+ {0x1002, 0x6765, CHIP_CAICOS},
+ {0x1002, 0x6766, CHIP_CAICOS},
+ {0x1002, 0x6767, CHIP_CAICOS},
+ {0x1002, 0x6768, CHIP_CAICOS},
+ {0x1002, 0x6770, CHIP_CAICOS},
+ {0x1002, 0x6779, CHIP_CAICOS},
{0, 0},
};
diff --git a/src/gallium/winsys/radeon/drm/Makefile b/src/gallium/winsys/radeon/drm/Makefile
index aa73edde34..e63ae6f500 100644
--- a/src/gallium/winsys/radeon/drm/Makefile
+++ b/src/gallium/winsys/radeon/drm/Makefile
@@ -5,9 +5,9 @@ include $(TOP)/configs/current
LIBNAME = radeonwinsys
C_SOURCES = \
- radeon_drm_buffer.c \
- radeon_drm_common.c \
- radeon_r300.c
+ radeon_drm_bo.c \
+ radeon_drm_cs.c \
+ radeon_drm_common.c
LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r300 \
$(shell pkg-config libdrm --cflags-only-I)
diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript
index 2dbf61a7ba..b16e03556d 100644
--- a/src/gallium/winsys/radeon/drm/SConscript
+++ b/src/gallium/winsys/radeon/drm/SConscript
@@ -3,12 +3,17 @@ Import('*')
env = env.Clone()
radeon_sources = [
- 'radeon_drm_buffer.c',
+ 'radeon_drm_bo.c',
+ 'radeon_drm_cs.c',
'radeon_drm_common.c',
- 'radeon_r300.c',
]
-env.ParseConfig('pkg-config --cflags libdrm_radeon')
+try:
+ env.ParseConfig('pkg-config --cflags libdrm_radeon')
+except:
+ print 'warning: not building r300g'
+ Return()
+
env.Append(CPPPATH = '#/src/gallium/drivers/r300')
radeonwinsys = env.ConvenienceLibrary(
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
new file mode 100644
index 0000000000..9eb833454d
--- /dev/null
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -0,0 +1,602 @@
+/*
+ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#define _FILE_OFFSET_BITS 64
+#include "radeon_drm_cs.h"
+
+#include "util/u_hash_table.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "os/os_thread.h"
+
+#include "state_tracker/drm_driver.h"
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <xf86drm.h>
+#include <errno.h>
+
+#define RADEON_BO_FLAGS_MACRO_TILE 1
+#define RADEON_BO_FLAGS_MICRO_TILE 2
+#define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20
+
+extern const struct pb_vtbl radeon_bo_vtbl;
+
+
+static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo)
+{
+ assert(bo->vtbl == &radeon_bo_vtbl);
+ return (struct radeon_bo *)bo;
+}
+
+struct radeon_bomgr {
+ /* Base class. */
+ struct pb_manager base;
+
+ /* Winsys. */
+ struct radeon_drm_winsys *rws;
+
+ /* List of buffer handles and its mutex. */
+ struct util_hash_table *bo_handles;
+ pipe_mutex bo_handles_mutex;
+};
+
+static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
+{
+ return (struct radeon_bomgr *)mgr;
+}
+
+static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf)
+{
+ struct radeon_bo *bo = NULL;
+
+ if (_buf->vtbl == &radeon_bo_vtbl) {
+ bo = radeon_bo(_buf);
+ } else {
+ struct pb_buffer *base_buf;
+ pb_size offset;
+ pb_get_base_buffer(_buf, &base_buf, &offset);
+
+ if (base_buf->vtbl == &radeon_bo_vtbl)
+ bo = radeon_bo(base_buf);
+ }
+
+ return bo;
+}
+
+void radeon_bo_unref(struct radeon_bo *bo)
+{
+ struct drm_gem_close args = {};
+
+ if (!p_atomic_dec_zero(&bo->ref_count))
+ return;
+
+ if (bo->name) {
+ pipe_mutex_lock(bo->mgr->bo_handles_mutex);
+ util_hash_table_remove(bo->mgr->bo_handles,
+ (void*)(uintptr_t)bo->name);
+ pipe_mutex_unlock(bo->mgr->bo_handles_mutex);
+ }
+
+ if (bo->ptr)
+ munmap(bo->ptr, bo->size);
+
+ /* Close object. */
+ args.handle = bo->handle;
+ drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
+ pipe_mutex_destroy(bo->map_mutex);
+ FREE(bo);
+}
+
+static void radeon_bo_wait(struct r300_winsys_bo *_buf)
+{
+ struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
+ struct drm_radeon_gem_wait_idle args = {};
+
+ while (p_atomic_read(&bo->num_active_ioctls)) {
+ sched_yield();
+ }
+
+ args.handle = bo->handle;
+ while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
+ &args, sizeof(args)) == -EBUSY);
+}
+
+static boolean radeon_bo_is_busy(struct r300_winsys_bo *_buf)
+{
+ struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
+ struct drm_radeon_gem_busy args = {};
+
+ if (p_atomic_read(&bo->num_active_ioctls)) {
+ return TRUE;
+ }
+
+ args.handle = bo->handle;
+ return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
+ &args, sizeof(args)) != 0;
+}
+
+static void radeon_bo_destroy(struct pb_buffer *_buf)
+{
+ struct radeon_bo *bo = radeon_bo(_buf);
+
+ radeon_bo_unref(bo);
+}
+
+static unsigned get_pb_usage_from_transfer_flags(enum pipe_transfer_usage usage)
+{
+ unsigned res = 0;
+
+ if (usage & PIPE_TRANSFER_DONTBLOCK)
+ res |= PB_USAGE_DONTBLOCK;
+
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+ res |= PB_USAGE_UNSYNCHRONIZED;
+
+ return res;
+}
+
+static void *radeon_bo_map_internal(struct pb_buffer *_buf,
+ unsigned flags, void *flush_ctx)
+{
+ struct radeon_bo *bo = radeon_bo(_buf);
+ struct radeon_drm_cs *cs = flush_ctx;
+ struct drm_radeon_gem_mmap args = {};
+ void *ptr;
+
+ /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
+ if (!(flags & PB_USAGE_UNSYNCHRONIZED)) {
+ /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
+ if (flags & PB_USAGE_DONTBLOCK) {
+ if (radeon_bo_is_referenced_by_cs(cs, bo)) {
+ cs->flush_cs(cs->flush_data, R300_FLUSH_ASYNC);
+ return NULL;
+ }
+
+ if (radeon_bo_is_busy((struct r300_winsys_bo*)bo)) {
+ return NULL;
+ }
+ } else {
+ if (radeon_bo_is_referenced_by_cs(cs, bo)) {
+ cs->flush_cs(cs->flush_data, 0);
+ } else {
+ /* Try to avoid busy-waiting in radeon_bo_wait. */
+ if (p_atomic_read(&bo->num_active_ioctls))
+ radeon_drm_cs_sync_flush(cs);
+ }
+
+ radeon_bo_wait((struct r300_winsys_bo*)bo);
+ }
+ }
+
+ /* Return the pointer if it's already mapped. */
+ if (bo->ptr)
+ return bo->ptr;
+
+ /* Map the buffer. */
+ pipe_mutex_lock(bo->map_mutex);
+ args.handle = bo->handle;
+ args.offset = 0;
+ args.size = (uint64_t)bo->size;
+ if (drmCommandWriteRead(bo->rws->fd,
+ DRM_RADEON_GEM_MMAP,
+ &args,
+ sizeof(args))) {
+ pipe_mutex_unlock(bo->map_mutex);
+ fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
+ bo, bo->handle);
+ return NULL;
+ }
+
+ ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
+ bo->rws->fd, args.addr_ptr);
+ if (ptr == MAP_FAILED) {
+ pipe_mutex_unlock(bo->map_mutex);
+ fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
+ return NULL;
+ }
+ bo->ptr = ptr;
+ pipe_mutex_unlock(bo->map_mutex);
+
+ return bo->ptr;
+}
+
+static void radeon_bo_unmap_internal(struct pb_buffer *_buf)
+{
+ /* NOP */
+}
+
+static void radeon_bo_get_base_buffer(struct pb_buffer *buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ *base_buf = buf;
+ *offset = 0;
+}
+
+static enum pipe_error radeon_bo_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ /* Always pinned */
+ return PIPE_OK;
+}
+
+static void radeon_bo_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+}
+
+const struct pb_vtbl radeon_bo_vtbl = {
+ radeon_bo_destroy,
+ radeon_bo_map_internal,
+ radeon_bo_unmap_internal,
+ radeon_bo_validate,
+ radeon_bo_fence,
+ radeon_bo_get_base_buffer,
+};
+
+static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
+ pb_size size,
+ const struct pb_desc *desc)
+{
+ struct radeon_bomgr *mgr = radeon_bomgr(_mgr);
+ struct radeon_drm_winsys *rws = mgr->rws;
+ struct radeon_bo *bo;
+ struct drm_radeon_gem_create args = {};
+
+ args.size = size;
+ args.alignment = desc->alignment;
+ args.initial_domain =
+ (desc->usage & RADEON_PB_USAGE_DOMAIN_GTT ?
+ RADEON_GEM_DOMAIN_GTT : 0) |
+ (desc->usage & RADEON_PB_USAGE_DOMAIN_VRAM ?
+ RADEON_GEM_DOMAIN_VRAM : 0);
+
+ if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
+ &args, sizeof(args))) {
+ fprintf(stderr, "Failed to allocate :\n");
+ fprintf(stderr, " size : %d bytes\n", size);
+ fprintf(stderr, " alignment : %d bytes\n", desc->alignment);
+ fprintf(stderr, " domains : %d\n", args.initial_domain);
+ return NULL;
+ }
+
+ bo = CALLOC_STRUCT(radeon_bo);
+ if (!bo)
+ return NULL;
+
+ pipe_reference_init(&bo->base.base.reference, 1);
+ bo->base.base.alignment = desc->alignment;
+ bo->base.base.usage = desc->usage;
+ bo->base.base.size = size;
+ bo->base.vtbl = &radeon_bo_vtbl;
+ bo->mgr = mgr;
+ bo->rws = mgr->rws;
+ bo->handle = args.handle;
+ bo->size = size;
+ pipe_mutex_init(bo->map_mutex);
+
+ radeon_bo_ref(bo);
+ return &bo->base;
+}
+
+static void radeon_bomgr_flush(struct pb_manager *mgr)
+{
+ /* NOP */
+}
+
+/* This is for the cache bufmgr. */
+static boolean radeon_bomgr_is_buffer_busy(struct pb_manager *_mgr,
+ struct pb_buffer *_buf)
+{
+ struct radeon_bo *bo = radeon_bo(_buf);
+
+ if (radeon_bo_is_referenced_by_any_cs(bo)) {
+ return TRUE;
+ }
+
+ if (radeon_bo_is_busy((struct r300_winsys_bo*)bo)) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void radeon_bomgr_destroy(struct pb_manager *_mgr)
+{
+ struct radeon_bomgr *mgr = radeon_bomgr(_mgr);
+ util_hash_table_destroy(mgr->bo_handles);
+ pipe_mutex_destroy(mgr->bo_handles_mutex);
+ FREE(mgr);
+}
+
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void *key)
+{
+ return PTR_TO_UINT(key);
+}
+
+static int handle_compare(void *key1, void *key2)
+{
+ return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
+}
+
+struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
+{
+ struct radeon_bomgr *mgr;
+
+ mgr = CALLOC_STRUCT(radeon_bomgr);
+ if (!mgr)
+ return NULL;
+
+ mgr->base.destroy = radeon_bomgr_destroy;
+ mgr->base.create_buffer = radeon_bomgr_create_bo;
+ mgr->base.flush = radeon_bomgr_flush;
+ mgr->base.is_buffer_busy = radeon_bomgr_is_buffer_busy;
+
+ mgr->rws = rws;
+ mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare);
+ pipe_mutex_init(mgr->bo_handles_mutex);
+ return &mgr->base;
+}
+
+static void *radeon_bo_map(struct r300_winsys_bo *buf,
+ struct r300_winsys_cs *cs,
+ enum pipe_transfer_usage usage)
+{
+ struct pb_buffer *_buf = pb_buffer(buf);
+
+ return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), cs);
+}
+
+static void radeon_bo_get_tiling(struct r300_winsys_bo *_buf,
+ enum r300_buffer_tiling *microtiled,
+ enum r300_buffer_tiling *macrotiled)
+{
+ struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
+ struct drm_radeon_gem_set_tiling args = {};
+
+ args.handle = bo->handle;
+
+ drmCommandWriteRead(bo->rws->fd,
+ DRM_RADEON_GEM_GET_TILING,
+ &args,
+ sizeof(args));
+
+ *microtiled = R300_BUFFER_LINEAR;
+ *macrotiled = R300_BUFFER_LINEAR;
+ if (args.tiling_flags & RADEON_BO_FLAGS_MICRO_TILE)
+ *microtiled = R300_BUFFER_TILED;
+
+ if (args.tiling_flags & RADEON_BO_FLAGS_MACRO_TILE)
+ *macrotiled = R300_BUFFER_TILED;
+}
+
+static void radeon_bo_set_tiling(struct r300_winsys_bo *_buf,
+ struct r300_winsys_cs *rcs,
+ enum r300_buffer_tiling microtiled,
+ enum r300_buffer_tiling macrotiled,
+ uint32_t pitch)
+{
+ struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct drm_radeon_gem_set_tiling args = {};
+
+ /* Tiling determines how DRM treats the buffer data.
+ * We must flush CS when changing it if the buffer is referenced. */
+ if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
+ cs->flush_cs(cs->flush_data, 0);
+ }
+
+ while (p_atomic_read(&bo->num_active_ioctls)) {
+ sched_yield();
+ }
+
+ if (microtiled == R300_BUFFER_TILED)
+ args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE;
+ else if (microtiled == R300_BUFFER_SQUARETILED)
+ args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE_SQUARE;
+
+ if (macrotiled == R300_BUFFER_TILED)
+ args.tiling_flags |= RADEON_BO_FLAGS_MACRO_TILE;
+
+ args.handle = bo->handle;
+ args.pitch = pitch;
+
+ drmCommandWriteRead(bo->rws->fd,
+ DRM_RADEON_GEM_SET_TILING,
+ &args,
+ sizeof(args));
+}
+
+static struct r300_winsys_cs_handle *radeon_drm_get_cs_handle(
+ struct r300_winsys_bo *_buf)
+{
+ /* return radeon_bo. */
+ return (struct r300_winsys_cs_handle*)
+ get_radeon_bo(pb_buffer(_buf));
+}
+
+static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage,
+ enum r300_buffer_domain domain)
+{
+ unsigned res = 0;
+
+ if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
+ res |= RADEON_PB_USAGE_CACHE;
+
+ if (domain & R300_DOMAIN_GTT)
+ res |= RADEON_PB_USAGE_DOMAIN_GTT;
+
+ if (domain & R300_DOMAIN_VRAM)
+ res |= RADEON_PB_USAGE_DOMAIN_VRAM;
+
+ return res;
+}
+
+static struct r300_winsys_bo *
+radeon_winsys_bo_create(struct r300_winsys_screen *rws,
+ unsigned size,
+ unsigned alignment,
+ unsigned bind,
+ unsigned usage,
+ enum r300_buffer_domain domain)
+{
+ struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
+ struct pb_desc desc;
+ struct pb_manager *provider;
+ struct pb_buffer *buffer;
+
+ memset(&desc, 0, sizeof(desc));
+ desc.alignment = alignment;
+ desc.usage = get_pb_usage_from_create_flags(bind, usage, domain);
+
+ /* Assign a buffer manager. */
+ if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
+ provider = ws->cman;
+ else
+ provider = ws->kman;
+
+ buffer = provider->create_buffer(provider, size, &desc);
+ if (!buffer)
+ return NULL;
+
+ return (struct r300_winsys_bo*)buffer;
+}
+
+static struct r300_winsys_bo *radeon_winsys_bo_from_handle(struct r300_winsys_screen *rws,
+ struct winsys_handle *whandle,
+ unsigned *stride,
+ unsigned *size)
+{
+ struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
+ struct radeon_bo *bo;
+ struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
+ struct drm_gem_open open_arg = {};
+
+ /* We must maintain a list of pairs <handle, bo>, so that we always return
+ * the same BO for one particular handle. If we didn't do that and created
+ * more than one BO for the same handle and then relocated them in a CS,
+ * we would hit a deadlock in the kernel.
+ *
+ * The list of pairs is guarded by a mutex, of course. */
+ pipe_mutex_lock(mgr->bo_handles_mutex);
+
+ /* First check if there already is an existing bo for the handle. */
+ bo = util_hash_table_get(mgr->bo_handles, (void*)(uintptr_t)whandle->handle);
+ if (bo) {
+ /* Increase the refcount. */
+ struct pb_buffer *b = NULL;
+ pb_reference(&b, &bo->base);
+ goto done;
+ }
+
+ /* There isn't, create a new one. */
+ bo = CALLOC_STRUCT(radeon_bo);
+ if (!bo) {
+ goto fail;
+ }
+
+ /* Open the BO. */
+ open_arg.name = whandle->handle;
+ if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
+ FREE(bo);
+ goto fail;
+ }
+ bo->handle = open_arg.handle;
+ bo->size = open_arg.size;
+ bo->name = whandle->handle;
+ radeon_bo_ref(bo);
+
+ /* Initialize it. */
+ pipe_reference_init(&bo->base.base.reference, 1);
+ bo->base.base.alignment = 0;
+ bo->base.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
+ bo->base.base.size = bo->size;
+ bo->base.vtbl = &radeon_bo_vtbl;
+ bo->mgr = mgr;
+ bo->rws = mgr->rws;
+ pipe_mutex_init(bo->map_mutex);
+
+ util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo);
+
+done:
+ pipe_mutex_unlock(mgr->bo_handles_mutex);
+
+ if (stride)
+ *stride = whandle->stride;
+ if (size)
+ *size = bo->base.base.size;
+
+ return (struct r300_winsys_bo*)bo;
+
+fail:
+ pipe_mutex_unlock(mgr->bo_handles_mutex);
+ return NULL;
+}
+
+static boolean radeon_winsys_bo_get_handle(struct r300_winsys_bo *buffer,
+ unsigned stride,
+ struct winsys_handle *whandle)
+{
+ struct drm_gem_flink flink = {};
+ struct radeon_bo *bo = get_radeon_bo(pb_buffer(buffer));
+
+ if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
+ if (!bo->flinked) {
+ flink.handle = bo->handle;
+
+ if (ioctl(bo->rws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
+ return FALSE;
+ }
+
+ bo->flinked = TRUE;
+ bo->flink = flink.name;
+ }
+ whandle->handle = bo->flink;
+ } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
+ whandle->handle = bo->handle;
+ }
+
+ whandle->stride = stride;
+ return TRUE;
+}
+
+void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
+{
+ ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
+ ws->base.buffer_set_tiling = radeon_bo_set_tiling;
+ ws->base.buffer_get_tiling = radeon_bo_get_tiling;
+ ws->base.buffer_map = radeon_bo_map;
+ ws->base.buffer_unmap = pb_unmap;
+ ws->base.buffer_wait = radeon_bo_wait;
+ ws->base.buffer_is_busy = radeon_bo_is_busy;
+ ws->base.buffer_create = radeon_winsys_bo_create;
+ ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
+ ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
+}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index 494abdc0b4..a26866b7e7 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -1,5 +1,6 @@
/*
* Copyright © 2008 Jérôme Glisse
+ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
@@ -26,28 +27,61 @@
/*
* Authors:
* Jérôme Glisse <glisse@freedesktop.org>
+ * Marek Olšák <maraeo@gmail.com>
*/
#ifndef RADEON_DRM_BUFFER_H
#define RADEON_DRM_BUFFER_H
#include "radeon_winsys.h"
+#include "pipebuffer/pb_bufmgr.h"
+#include "os/os_thread.h"
-#define RADEON_PB_USAGE_VERTEX (1 << 28)
+#define RADEON_PB_USAGE_CACHE (1 << 28)
#define RADEON_PB_USAGE_DOMAIN_GTT (1 << 29)
#define RADEON_PB_USAGE_DOMAIN_VRAM (1 << 30)
+struct radeon_bomgr;
+
+struct radeon_bo {
+ struct pb_buffer base;
+ struct radeon_bomgr *mgr;
+ struct radeon_drm_winsys *rws;
+
+ void *ptr;
+ pipe_mutex map_mutex;
+
+ uint32_t size;
+ uint32_t handle;
+ uint32_t name;
+
+ int ref_count;
+
+ /* how many command streams is this bo referenced in? */
+ int num_cs_references;
+
+ /* how many command streams, which are being emitted in a separate
+ * thread, is this bo referenced in? */
+ int num_active_ioctls;
+
+ boolean flinked;
+ uint32_t flink;
+};
+
+struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws);
+void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws);
+
+void radeon_bo_unref(struct radeon_bo *buf);
+
+
+static INLINE void radeon_bo_ref(struct radeon_bo *bo)
+{
+ p_atomic_inc(&bo->ref_count);
+}
+
static INLINE struct pb_buffer *
-radeon_pb_buffer(struct r300_winsys_buffer *buffer)
+pb_buffer(struct r300_winsys_bo *buffer)
{
return (struct pb_buffer *)buffer;
}
-struct pb_manager *radeon_drm_bufmgr_create(struct radeon_drm_winsys *rws);
-struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr,
- uint32_t handle);
-void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr);
-boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf,
- struct winsys_handle *whandle);
-void radeon_drm_bufmgr_init_functions(struct radeon_drm_winsys *ws);
-
#endif
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c
deleted file mode 100644
index dc4b51747f..0000000000
--- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c
+++ /dev/null
@@ -1,564 +0,0 @@
-#include "radeon_cs_gem.h"
-#include "radeon_drm_buffer.h"
-
-#include "util/u_hash_table.h"
-#include "util/u_memory.h"
-#include "util/u_simple_list.h"
-#include "pipebuffer/pb_bufmgr.h"
-#include "os/os_thread.h"
-
-#include "state_tracker/drm_driver.h"
-
-#include <radeon_drm.h>
-#include <radeon_bo_gem.h>
-#include <sys/ioctl.h>
-
-struct radeon_drm_bufmgr;
-
-struct radeon_drm_buffer {
- struct pb_buffer base;
- struct radeon_drm_bufmgr *mgr;
-
- struct radeon_bo *bo;
-
- boolean flinked;
- uint32_t flink;
-
- struct radeon_drm_buffer *next, *prev;
-};
-
-extern const struct pb_vtbl radeon_drm_buffer_vtbl;
-
-
-static INLINE struct radeon_drm_buffer *
-radeon_drm_buffer(struct pb_buffer *buf)
-{
- assert(buf);
- assert(buf->vtbl == &radeon_drm_buffer_vtbl);
- return (struct radeon_drm_buffer *)buf;
-}
-
-struct radeon_drm_bufmgr {
- /* Base class. */
- struct pb_manager base;
-
- /* Winsys. */
- struct radeon_drm_winsys *rws;
-
- /* List of mapped buffers and its mutex. */
- struct radeon_drm_buffer buffer_map_list;
- pipe_mutex buffer_map_list_mutex;
-
- /* List of buffer handles and its mutex. */
- struct util_hash_table *buffer_handles;
- pipe_mutex buffer_handles_mutex;
-};
-
-static INLINE struct radeon_drm_bufmgr *
-radeon_drm_bufmgr(struct pb_manager *mgr)
-{
- assert(mgr);
- return (struct radeon_drm_bufmgr *)mgr;
-}
-
-static void
-radeon_drm_buffer_destroy(struct pb_buffer *_buf)
-{
- struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf);
- int name;
-
- if (buf->bo->ptr != NULL) {
- pipe_mutex_lock(buf->mgr->buffer_map_list_mutex);
- /* Now test it again inside the mutex. */
- if (buf->bo->ptr != NULL) {
- remove_from_list(buf);
- radeon_bo_unmap(buf->bo);
- buf->bo->ptr = NULL;
- }
- pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex);
- }
- name = radeon_gem_name_bo(buf->bo);
- if (name) {
- pipe_mutex_lock(buf->mgr->buffer_handles_mutex);
- util_hash_table_remove(buf->mgr->buffer_handles,
- (void*)(uintptr_t)name);
- pipe_mutex_unlock(buf->mgr->buffer_handles_mutex);
- }
- radeon_bo_unref(buf->bo);
-
- FREE(buf);
-}
-
-static unsigned get_pb_usage_from_transfer_flags(enum pipe_transfer_usage usage)
-{
- unsigned res = 0;
-
- if (usage & PIPE_TRANSFER_READ)
- res |= PB_USAGE_CPU_READ;
-
- if (usage & PIPE_TRANSFER_WRITE)
- res |= PB_USAGE_CPU_WRITE;
-
- if (usage & PIPE_TRANSFER_DONTBLOCK)
- res |= PB_USAGE_DONTBLOCK;
-
- if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
- res |= PB_USAGE_UNSYNCHRONIZED;
-
- return res;
-}
-
-static void *
-radeon_drm_buffer_map_internal(struct pb_buffer *_buf,
- unsigned flags, void *flush_ctx)
-{
- struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf);
- struct radeon_drm_cs *cs = flush_ctx;
- int write = 0;
-
- /* Note how we use radeon_bo_is_referenced_by_cs here. There are
- * basically two places this map function can be called from:
- * - pb_map
- * - create_buffer (in the buffer reuse case)
- *
- * Since pb managers are per-winsys managers, not per-context managers,
- * and we shouldn't reuse buffers if they are in-use in any context,
- * we simply ask: is this buffer referenced by *any* CS?
- *
- * The problem with buffer_create is that it comes from pipe_screen,
- * so we have no CS to look at, though luckily the following code
- * is sufficient to tell whether the buffer is in use. */
- if (flags & PB_USAGE_DONTBLOCK) {
- if (_buf->base.usage & RADEON_PB_USAGE_VERTEX)
- if (radeon_bo_is_referenced_by_cs(buf->bo, NULL))
- return NULL;
- }
-
- if (buf->bo->ptr != NULL) {
- pipe_mutex_lock(buf->mgr->buffer_map_list_mutex);
- /* Now test ptr again inside the mutex. We might have gotten a race
- * during the first test. */
- if (buf->bo->ptr != NULL) {
- remove_from_list(buf);
- }
- pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex);
- return buf->bo->ptr;
- }
-
- if (flags & PB_USAGE_DONTBLOCK) {
- uint32_t domain;
- if (radeon_bo_is_busy(buf->bo, &domain))
- return NULL;
- }
-
- /* If we don't have any CS and the buffer is referenced,
- * we cannot flush. */
- assert(cs || !radeon_bo_is_referenced_by_cs(buf->bo, NULL));
-
- if (cs && radeon_bo_is_referenced_by_cs(buf->bo, cs->cs)) {
- cs->flush_cs(cs->flush_data);
- }
-
- if (flags & PB_USAGE_CPU_WRITE) {
- write = 1;
- }
-
- if (radeon_bo_map(buf->bo, write)) {
- return NULL;
- }
-
- pipe_mutex_lock(buf->mgr->buffer_map_list_mutex);
- remove_from_list(buf);
- pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex);
- return buf->bo->ptr;
-}
-
-static void
-radeon_drm_buffer_unmap_internal(struct pb_buffer *_buf)
-{
- struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf);
- pipe_mutex_lock(buf->mgr->buffer_map_list_mutex);
- if (is_empty_list(buf)) { /* = is not inserted... */
- insert_at_tail(&buf->mgr->buffer_map_list, buf);
- }
- pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex);
-}
-
-static void
-radeon_drm_buffer_get_base_buffer(struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset)
-{
- *base_buf = buf;
- *offset = 0;
-}
-
-
-static enum pipe_error
-radeon_drm_buffer_validate(struct pb_buffer *_buf,
- struct pb_validate *vl,
- unsigned flags)
-{
- /* Always pinned */
- return PIPE_OK;
-}
-
-static void
-radeon_drm_buffer_fence(struct pb_buffer *buf,
- struct pipe_fence_handle *fence)
-{
-}
-
-const struct pb_vtbl radeon_drm_buffer_vtbl = {
- radeon_drm_buffer_destroy,
- radeon_drm_buffer_map_internal,
- radeon_drm_buffer_unmap_internal,
- radeon_drm_buffer_validate,
- radeon_drm_buffer_fence,
- radeon_drm_buffer_get_base_buffer,
-};
-
-static struct pb_buffer *
-radeon_drm_bufmgr_create_buffer_from_handle_unsafe(struct pb_manager *_mgr,
- uint32_t handle)
-{
- struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr);
- struct radeon_drm_winsys *rws = mgr->rws;
- struct radeon_drm_buffer *buf;
- struct radeon_bo *bo;
-
- buf = util_hash_table_get(mgr->buffer_handles, (void*)(uintptr_t)handle);
-
- if (buf) {
- struct pb_buffer *b = NULL;
- pb_reference(&b, &buf->base);
- return b;
- }
-
- bo = radeon_bo_open(rws->bom, handle, 0,
- 0, 0, 0);
- if (bo == NULL)
- return NULL;
-
- buf = CALLOC_STRUCT(radeon_drm_buffer);
- if (!buf) {
- radeon_bo_unref(bo);
- return NULL;
- }
-
- make_empty_list(buf);
-
- pipe_reference_init(&buf->base.base.reference, 1);
- buf->base.base.alignment = 0;
- buf->base.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
- buf->base.base.size = bo->size;
- buf->base.vtbl = &radeon_drm_buffer_vtbl;
- buf->mgr = mgr;
-
- buf->bo = bo;
-
- util_hash_table_set(mgr->buffer_handles, (void*)(uintptr_t)handle, buf);
-
- return &buf->base;
-}
-
-struct pb_buffer *
-radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr,
- uint32_t handle)
-{
- struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr);
- struct pb_buffer *pb;
-
- pipe_mutex_lock(mgr->buffer_handles_mutex);
- pb = radeon_drm_bufmgr_create_buffer_from_handle_unsafe(_mgr, handle);
- pipe_mutex_unlock(mgr->buffer_handles_mutex);
-
- return pb;
-}
-
-static struct pb_buffer *
-radeon_drm_bufmgr_create_buffer(struct pb_manager *_mgr,
- pb_size size,
- const struct pb_desc *desc)
-{
- struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr);
- struct radeon_drm_winsys *rws = mgr->rws;
- struct radeon_drm_buffer *buf;
- uint32_t domain;
-
- buf = CALLOC_STRUCT(radeon_drm_buffer);
- if (!buf)
- goto error1;
-
- pipe_reference_init(&buf->base.base.reference, 1);
- buf->base.base.alignment = desc->alignment;
- buf->base.base.usage = desc->usage;
- buf->base.base.size = size;
- buf->base.vtbl = &radeon_drm_buffer_vtbl;
- buf->mgr = mgr;
-
- make_empty_list(buf);
-
- domain =
- (desc->usage & RADEON_PB_USAGE_DOMAIN_GTT ? RADEON_GEM_DOMAIN_GTT : 0) |
- (desc->usage & RADEON_PB_USAGE_DOMAIN_VRAM ? RADEON_GEM_DOMAIN_VRAM : 0);
-
- buf->bo = radeon_bo_open(rws->bom, 0, size,
- desc->alignment, domain, 0);
- if (buf->bo == NULL)
- goto error2;
-
- return &buf->base;
-
- error2:
- FREE(buf);
- error1:
- return NULL;
-}
-
-static void
-radeon_drm_bufmgr_flush(struct pb_manager *mgr)
-{
- /* NOP */
-}
-
-static void
-radeon_drm_bufmgr_destroy(struct pb_manager *_mgr)
-{
- struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr);
- util_hash_table_destroy(mgr->buffer_handles);
- pipe_mutex_destroy(mgr->buffer_map_list_mutex);
- pipe_mutex_destroy(mgr->buffer_handles_mutex);
- FREE(mgr);
-}
-
-static unsigned handle_hash(void *key)
-{
- return (unsigned)key;
-}
-
-static int handle_compare(void *key1, void *key2)
-{
- return !((int)key1 == (int)key2);
-}
-
-struct pb_manager *
-radeon_drm_bufmgr_create(struct radeon_drm_winsys *rws)
-{
- struct radeon_drm_bufmgr *mgr;
-
- mgr = CALLOC_STRUCT(radeon_drm_bufmgr);
- if (!mgr)
- return NULL;
-
- mgr->base.destroy = radeon_drm_bufmgr_destroy;
- mgr->base.create_buffer = radeon_drm_bufmgr_create_buffer;
- mgr->base.flush = radeon_drm_bufmgr_flush;
-
- mgr->rws = rws;
- make_empty_list(&mgr->buffer_map_list);
- mgr->buffer_handles = util_hash_table_create(handle_hash, handle_compare);
- pipe_mutex_init(mgr->buffer_map_list_mutex);
- pipe_mutex_init(mgr->buffer_handles_mutex);
- return &mgr->base;
-}
-
-static struct radeon_drm_buffer *get_drm_buffer(struct pb_buffer *_buf)
-{
- struct radeon_drm_buffer *buf = NULL;
-
- if (_buf->vtbl == &radeon_drm_buffer_vtbl) {
- buf = radeon_drm_buffer(_buf);
- } else {
- struct pb_buffer *base_buf;
- pb_size offset;
- pb_get_base_buffer(_buf, &base_buf, &offset);
-
- if (base_buf->vtbl == &radeon_drm_buffer_vtbl)
- buf = radeon_drm_buffer(base_buf);
- }
-
- return buf;
-}
-
-static void *radeon_drm_buffer_map(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
- struct r300_winsys_cs *cs,
- enum pipe_transfer_usage usage)
-{
- struct pb_buffer *_buf = radeon_pb_buffer(buf);
-
- return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), radeon_drm_cs(cs));
-}
-
-static void radeon_drm_buffer_unmap(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf)
-{
- struct pb_buffer *_buf = radeon_pb_buffer(buf);
-
- pb_unmap(_buf);
-}
-
-boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf,
- struct winsys_handle *whandle)
-{
- struct drm_gem_flink flink;
- struct radeon_drm_buffer *buf = get_drm_buffer(_buf);
-
- if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
- if (!buf->flinked) {
- flink.handle = buf->bo->handle;
-
- if (ioctl(buf->mgr->rws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
- return FALSE;
- }
-
- buf->flinked = TRUE;
- buf->flink = flink.name;
- }
- whandle->handle = buf->flink;
- } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
- whandle->handle = buf->bo->handle;
- }
- return TRUE;
-}
-
-static void radeon_drm_buffer_get_tiling(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *_buf,
- enum r300_buffer_tiling *microtiled,
- enum r300_buffer_tiling *macrotiled)
-{
- struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf));
- uint32_t flags = 0, pitch;
-
- radeon_bo_get_tiling(buf->bo, &flags, &pitch);
-
- *microtiled = R300_BUFFER_LINEAR;
- *macrotiled = R300_BUFFER_LINEAR;
- if (flags & RADEON_BO_FLAGS_MICRO_TILE)
- *microtiled = R300_BUFFER_TILED;
-
- if (flags & RADEON_BO_FLAGS_MACRO_TILE)
- *macrotiled = R300_BUFFER_TILED;
-}
-
-static void radeon_drm_buffer_set_tiling(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *_buf,
- enum r300_buffer_tiling microtiled,
- enum r300_buffer_tiling macrotiled,
- uint32_t pitch)
-{
- struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf));
- uint32_t flags = 0;
- if (microtiled == R300_BUFFER_TILED)
- flags |= RADEON_BO_FLAGS_MICRO_TILE;
-/* XXX Remove this ifdef when libdrm version 2.4.19 becomes mandatory. */
-#ifdef RADEON_BO_FLAGS_MICRO_TILE_SQUARE
- else if (microtiled == R300_BUFFER_SQUARETILED)
- flags |= RADEON_BO_FLAGS_MICRO_TILE_SQUARE;
-#endif
- if (macrotiled == R300_BUFFER_TILED)
- flags |= RADEON_BO_FLAGS_MACRO_TILE;
-
- radeon_bo_set_tiling(buf->bo, flags, pitch);
-}
-
-static void radeon_drm_bufmgr_add_buffer(struct r300_winsys_cs *rcs,
- struct r300_winsys_cs_buffer *_buf,
- enum r300_buffer_domain rd,
- enum r300_buffer_domain wd)
-{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- struct radeon_bo *bo = (struct radeon_bo*)_buf;
-
- radeon_cs_space_add_persistent_bo(cs->cs, bo, rd, wd);
-}
-
-static void radeon_drm_bufmgr_write_reloc(struct r300_winsys_cs *rcs,
- struct r300_winsys_cs_buffer *_buf,
- enum r300_buffer_domain rd,
- enum r300_buffer_domain wd)
-{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- struct radeon_bo *bo = (struct radeon_bo*)_buf;
- int retval;
-
- cs->cs->cdw = cs->base.cdw;
- retval = radeon_cs_write_reloc(cs->cs, bo, rd, wd, 0);
- cs->base.cdw = cs->cs->cdw;
- if (retval) {
- fprintf(stderr, "radeon: Relocation of %p (%d, %d, %d) failed!\n",
- bo, rd, wd, 0);
- }
-}
-
-static struct r300_winsys_cs_buffer *radeon_drm_get_cs_handle(
- struct r300_winsys_screen *rws,
- struct r300_winsys_buffer *_buf)
-{
- /* return pure radeon_bo. */
- return (struct r300_winsys_cs_buffer*)
- get_drm_buffer(radeon_pb_buffer(_buf))->bo;
-}
-
-static boolean radeon_drm_is_buffer_referenced(struct r300_winsys_cs *rcs,
- struct r300_winsys_cs_buffer *_buf,
- enum r300_reference_domain domain)
-{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- struct radeon_bo *bo = (struct radeon_bo*)_buf;
- uint32_t tmp;
-
- if (domain & R300_REF_CS) {
- if (radeon_bo_is_referenced_by_cs(bo, cs->cs)) {
- return TRUE;
- }
- }
-
- if (domain & R300_REF_HW) {
- if (radeon_bo_is_busy(bo, &tmp)) {
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr)
-{
- struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr);
- struct radeon_drm_buffer *rpb, *t_rpb;
-
- pipe_mutex_lock(mgr->buffer_map_list_mutex);
-
- foreach_s(rpb, t_rpb, &mgr->buffer_map_list) {
- radeon_bo_unmap(rpb->bo);
- rpb->bo->ptr = NULL;
- remove_from_list(rpb);
- }
-
- make_empty_list(&mgr->buffer_map_list);
-
- pipe_mutex_unlock(mgr->buffer_map_list_mutex);
-}
-
-static void radeon_drm_buffer_wait(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *_buf)
-{
- struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf));
-
- radeon_bo_wait(buf->bo);
-}
-
-void radeon_drm_bufmgr_init_functions(struct radeon_drm_winsys *ws)
-{
- ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
- ws->base.buffer_set_tiling = radeon_drm_buffer_set_tiling;
- ws->base.buffer_get_tiling = radeon_drm_buffer_get_tiling;
- ws->base.buffer_map = radeon_drm_buffer_map;
- ws->base.buffer_unmap = radeon_drm_buffer_unmap;
- ws->base.buffer_wait = radeon_drm_buffer_wait;
- ws->base.cs_is_buffer_referenced = radeon_drm_is_buffer_referenced;
- ws->base.cs_add_buffer = radeon_drm_bufmgr_add_buffer;
- ws->base.cs_write_reloc = radeon_drm_bufmgr_write_reloc;
-}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_common.c b/src/gallium/winsys/radeon/drm/radeon_drm_common.c
index 6bc6244115..72c2ff1112 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_common.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_common.c
@@ -1,5 +1,6 @@
/*
* Copyright © 2009 Corbin Simpson
+ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
@@ -27,39 +28,35 @@
* Authors:
* Corbin Simpson <MostAwesomeDude@gmail.com>
* Joakim Sindholt <opensource@zhasha.com>
+ * Marek Olšák <maraeo@gmail.com>
*/
#include "radeon_winsys.h"
-#include "radeon_drm_buffer.h"
+#include "radeon_drm_bo.h"
+#include "radeon_drm_cs.h"
#include "radeon_drm_public.h"
#include "pipebuffer/pb_bufmgr.h"
#include "util/u_memory.h"
-#include "state_tracker/drm_driver.h"
-
-#include <radeon_drm.h>
-#include <radeon_bo_gem.h>
-#include <radeon_cs_gem.h>
#include <xf86drm.h>
#include <stdio.h>
-
-/* Enable/disable Hyper-Z access. Return TRUE on success. */
-static boolean radeon_set_hyperz_access(int fd, boolean enable)
-{
#ifndef RADEON_INFO_WANT_HYPERZ
#define RADEON_INFO_WANT_HYPERZ 7
#endif
+#ifndef RADEON_INFO_WANT_CMASK
+#define RADEON_INFO_WANT_CMASK 8
+#endif
+/* Enable/disable feature access. Return TRUE on success. */
+static boolean radeon_set_fd_access(int fd, unsigned request, boolean enable)
+{
struct drm_radeon_info info = {0};
unsigned value = enable ? 1 : 0;
- if (!debug_get_bool_option("RADEON_HYPERZ", FALSE))
- return FALSE;
-
info.value = (unsigned long)&value;
- info.request = RADEON_INFO_WANT_HYPERZ;
+ info.request = request;
if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0)
return FALSE;
@@ -99,28 +96,21 @@ static void do_ioctls(struct radeon_drm_winsys *winsys)
* we don't actually use the info for anything yet. */
version = drmGetVersion(winsys->fd);
- if (version->version_major != 2) {
+ if (version->version_major != 2 ||
+ version->version_minor < 3) {
fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
- "only compatible with 2.x.x\n", __FUNCTION__,
- version->version_major, version->version_minor,
+ "only compatible with 2.3.x (kernel 2.6.34) and later.\n",
+ __FUNCTION__,
+ version->version_major,
+ version->version_minor,
version->version_patchlevel);
drmFreeVersion(version);
exit(1);
}
-/* XXX Remove this ifdef when libdrm version 2.4.19 becomes mandatory. */
-#ifdef RADEON_BO_FLAGS_MICRO_TILE_SQUARE
- // Supported since 2.1.0.
- winsys->squaretiling = version->version_major > 2 ||
- version->version_minor >= 1;
-#endif
-
- winsys->drm_2_3_0 = version->version_major > 2 ||
- version->version_minor >= 3;
-
- winsys->drm_2_6_0 = version->version_major > 2 ||
- (version->version_major == 2 &&
- version->version_minor >= 6);
+ winsys->drm_major = version->version_major;
+ winsys->drm_minor = version->version_minor;
+ winsys->drm_patchlevel = version->version_patchlevel;
info.request = RADEON_INFO_DEVICE_ID;
retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info));
@@ -149,7 +139,15 @@ static void do_ioctls(struct radeon_drm_winsys *winsys)
}
winsys->z_pipes = target;
- winsys->hyperz = radeon_set_hyperz_access(winsys->fd, TRUE);
+ if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) {
+ winsys->hyperz = radeon_set_fd_access(winsys->fd,
+ RADEON_INFO_WANT_HYPERZ, TRUE);
+ }
+
+ if (debug_get_bool_option("RADEON_CMASK", FALSE)) {
+ winsys->aacompress = radeon_set_fd_access(winsys->fd,
+ RADEON_INFO_WANT_CMASK, TRUE);
+ }
retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO,
&gem_info, sizeof(gem_info));
@@ -161,18 +159,9 @@ static void do_ioctls(struct radeon_drm_winsys *winsys)
winsys->gart_size = gem_info.gart_size;
winsys->vram_size = gem_info.vram_size;
- debug_printf("radeon: Successfully grabbed chipset info from kernel!\n"
- "radeon: DRM version: %d.%d.%d ID: 0x%04x GB: %d Z: %d\n"
- "radeon: GART size: %d MB VRAM size: %d MB\n"
- "radeon: HyperZ: %s\n",
- version->version_major, version->version_minor,
- version->version_patchlevel, winsys->pci_id,
- winsys->gb_pipes, winsys->z_pipes,
- winsys->gart_size / 1024 / 1024,
- winsys->vram_size / 1024 / 1024,
- winsys->hyperz ? "YES" : "NO");
-
drmFreeVersion(version);
+
+ winsys->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
}
static void radeon_winsys_destroy(struct r300_winsys_screen *rws)
@@ -181,12 +170,43 @@ static void radeon_winsys_destroy(struct r300_winsys_screen *rws)
ws->cman->destroy(ws->cman);
ws->kman->destroy(ws->kman);
-
- radeon_bo_manager_gem_dtor(ws->bom);
- radeon_cs_manager_gem_dtor(ws->csm);
FREE(rws);
}
+static uint32_t radeon_get_value(struct r300_winsys_screen *rws,
+ enum r300_value_id id)
+{
+ struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws;
+
+ switch(id) {
+ case R300_VID_PCI_ID:
+ return ws->pci_id;
+ case R300_VID_GB_PIPES:
+ return ws->gb_pipes;
+ case R300_VID_Z_PIPES:
+ return ws->z_pipes;
+ case R300_VID_GART_SIZE:
+ return ws->gart_size;
+ case R300_VID_VRAM_SIZE:
+ return ws->vram_size;
+ case R300_VID_DRM_MAJOR:
+ return ws->drm_major;
+ case R300_VID_DRM_MINOR:
+ return ws->drm_minor;
+ case R300_VID_DRM_PATCHLEVEL:
+ return ws->drm_patchlevel;
+ case R300_VID_DRM_2_6_0:
+ return ws->drm_major*100 + ws->drm_minor >= 206;
+ case R300_VID_DRM_2_8_0:
+ return ws->drm_major*100 + ws->drm_minor >= 208;
+ case R300_CAN_HYPERZ:
+ return ws->hyperz;
+ case R300_CAN_AACOMPRESS:
+ return ws->aacompress;
+ }
+ return 0;
+}
+
struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd)
{
struct radeon_drm_winsys *ws = CALLOC_STRUCT(radeon_drm_winsys);
@@ -202,13 +222,7 @@ struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd)
}
/* Create managers. */
- ws->bom = radeon_bo_manager_gem_ctor(fd);
- if (!ws->bom)
- goto fail;
- ws->csm = radeon_cs_manager_gem_ctor(fd);
- if (!ws->csm)
- goto fail;
- ws->kman = radeon_drm_bufmgr_create(ws);
+ ws->kman = radeon_bomgr_create(ws);
if (!ws->kman)
goto fail;
ws->cman = pb_cache_manager_create(ws->kman, 1000000);
@@ -217,23 +231,18 @@ struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd)
/* Set functions. */
ws->base.destroy = radeon_winsys_destroy;
+ ws->base.get_value = radeon_get_value;
- radeon_drm_bufmgr_init_functions(ws);
- radeon_winsys_init_functions(ws);
+ radeon_bomgr_init_functions(ws);
+ radeon_drm_cs_init_functions(ws);
return &ws->base;
fail:
- if (ws->bom)
- radeon_bo_manager_gem_dtor(ws->bom);
- if (ws->csm)
- radeon_cs_manager_gem_dtor(ws->csm);
-
if (ws->cman)
ws->cman->destroy(ws->cman);
if (ws->kman)
ws->kman->destroy(ws->kman);
-
FREE(ws);
return NULL;
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
new file mode 100644
index 0000000000..951791a172
--- /dev/null
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Marek Olšák <maraeo@gmail.com>
+ *
+ * Based on work from libdrm_radeon by:
+ * Aapo Tahkola <aet@rasterburn.org>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+
+/*
+ This file replaces libdrm's radeon_cs_gem with our own implemention.
+ It's optimized specifically for r300g, but r600g could use it as well.
+ Reloc writes and space checking are faster and simpler than their
+ counterparts in libdrm (the time complexity of all the functions
+ is O(1) in nearly all scenarios, thanks to hashing).
+
+ It works like this:
+
+ cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
+ also adds the size of 'buf' to the used_gart and used_vram winsys variables
+ based on the domains, which are simply or'd for the accounting purposes.
+ The adding is skipped if the reloc is already present in the list, but it
+ accounts any newly-referenced domains.
+
+ cs_validate is then called, which just checks:
+ used_vram/gart < vram/gart_size * 0.8
+ The 0.8 number allows for some memory fragmentation. If the validation
+ fails, the pipe driver flushes CS and tries do the validation again,
+ i.e. it validates only that one operation. If it fails again, it drops
+ the operation on the floor and prints some nasty message to stderr.
+ (done in the pipe driver)
+
+ cs_write_reloc(cs, buf) just writes a reloc that has been added using
+ cs_add_reloc. The read_domain and write_domain parameters have been removed,
+ because we already specify them in cs_add_reloc.
+*/
+
+#include "radeon_drm_cs.h"
+
+#include "util/u_memory.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <xf86drm.h>
+
+#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
+
+static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
+{
+ csc->fd = fd;
+ csc->nrelocs = 512;
+ csc->relocs_bo = (struct radeon_bo**)
+ CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
+ if (!csc->relocs_bo) {
+ return FALSE;
+ }
+
+ csc->relocs = (struct drm_radeon_cs_reloc*)
+ CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
+ if (!csc->relocs) {
+ FREE(csc->relocs_bo);
+ return FALSE;
+ }
+
+ csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
+ csc->chunks[0].length_dw = 0;
+ csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
+ csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
+ csc->chunks[1].length_dw = 0;
+ csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
+
+ csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
+ csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
+
+ csc->cs.num_chunks = 2;
+ csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
+ return TRUE;
+}
+
+static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
+{
+ unsigned i;
+
+ for (i = 0; i < csc->crelocs; i++) {
+ p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
+ radeon_bo_unref(csc->relocs_bo[i]);
+ csc->relocs_bo[i] = NULL;
+ }
+
+ csc->crelocs = 0;
+ csc->chunks[0].length_dw = 0;
+ csc->chunks[1].length_dw = 0;
+ csc->used_gart = 0;
+ csc->used_vram = 0;
+ memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added));
+}
+
+static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
+{
+ radeon_cs_context_cleanup(csc);
+ FREE(csc->relocs_bo);
+ FREE(csc->relocs);
+}
+
+static struct r300_winsys_cs *radeon_drm_cs_create(struct r300_winsys_screen *rws)
+{
+ struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
+ struct radeon_drm_cs *cs;
+
+ cs = CALLOC_STRUCT(radeon_drm_cs);
+ if (!cs) {
+ return NULL;
+ }
+
+ cs->ws = ws;
+
+ if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) {
+ FREE(cs);
+ return NULL;
+ }
+ if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) {
+ radeon_destroy_cs_context(&cs->csc1);
+ FREE(cs);
+ return NULL;
+ }
+
+ /* Set the first command buffer as current. */
+ cs->csc = &cs->csc1;
+ cs->cst = &cs->csc2;
+ cs->base.buf = cs->csc->buf;
+
+ p_atomic_inc(&ws->num_cs);
+ return &cs->base;
+}
+
+#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
+
+static INLINE void update_domains(struct drm_radeon_cs_reloc *reloc,
+ enum r300_buffer_domain rd,
+ enum r300_buffer_domain wd,
+ enum r300_buffer_domain *added_domains)
+{
+ *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
+
+ if (reloc->read_domains & wd) {
+ reloc->read_domains = rd;
+ reloc->write_domain = wd;
+ } else if (rd & reloc->write_domain) {
+ reloc->read_domains = rd;
+ reloc->write_domain |= wd;
+ } else {
+ reloc->read_domains |= rd;
+ reloc->write_domain |= wd;
+ }
+}
+
+int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
+{
+ struct drm_radeon_cs_reloc *reloc;
+ unsigned i;
+ unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
+
+ if (csc->is_handle_added[hash]) {
+ reloc = csc->relocs_hashlist[hash];
+ if (reloc->handle == bo->handle) {
+ return csc->reloc_indices_hashlist[hash];
+ }
+
+ /* Hash collision, look for the BO in the list of relocs linearly. */
+ for (i = csc->crelocs; i != 0;) {
+ --i;
+ reloc = &csc->relocs[i];
+ if (reloc->handle == bo->handle) {
+ /* Put this reloc in the hash list.
+ * This will prevent additional hash collisions if there are
+ * several subsequent get_reloc calls of the same buffer.
+ *
+ * Example: Assuming buffers A,B,C collide in the hash list,
+ * the following sequence of relocs:
+ * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
+ * will collide here: ^ and here: ^,
+ * meaning that we should get very few collisions in the end. */
+ csc->relocs_hashlist[hash] = reloc;
+ csc->reloc_indices_hashlist[hash] = i;
+ /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
+ return i;
+ }
+ }
+ }
+
+ return -1;
+}
+
+static void radeon_add_reloc(struct radeon_cs_context *csc,
+ struct radeon_bo *bo,
+ enum r300_buffer_domain rd,
+ enum r300_buffer_domain wd,
+ enum r300_buffer_domain *added_domains)
+{
+ struct drm_radeon_cs_reloc *reloc;
+ unsigned i;
+ unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
+
+ if (csc->is_handle_added[hash]) {
+ reloc = csc->relocs_hashlist[hash];
+ if (reloc->handle == bo->handle) {
+ update_domains(reloc, rd, wd, added_domains);
+ return;
+ }
+
+ /* Hash collision, look for the BO in the list of relocs linearly. */
+ for (i = csc->crelocs; i != 0;) {
+ --i;
+ reloc = &csc->relocs[i];
+ if (reloc->handle == bo->handle) {
+ update_domains(reloc, rd, wd, added_domains);
+
+ csc->relocs_hashlist[hash] = reloc;
+ csc->reloc_indices_hashlist[hash] = i;
+ /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
+ return;
+ }
+ }
+ }
+
+ /* New relocation, check if the backing array is large enough. */
+ if (csc->crelocs >= csc->nrelocs) {
+ uint32_t size;
+ csc->nrelocs += 10;
+
+ size = csc->nrelocs * sizeof(struct radeon_bo*);
+ csc->relocs_bo = (struct radeon_bo**)realloc(csc->relocs_bo, size);
+
+ size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
+ csc->relocs = (struct drm_radeon_cs_reloc*)realloc(csc->relocs, size);
+
+ csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
+ }
+
+ /* Initialize the new relocation. */
+ radeon_bo_ref(bo);
+ p_atomic_inc(&bo->num_cs_references);
+ csc->relocs_bo[csc->crelocs] = bo;
+ reloc = &csc->relocs[csc->crelocs];
+ reloc->handle = bo->handle;
+ reloc->read_domains = rd;
+ reloc->write_domain = wd;
+ reloc->flags = 0;
+
+ csc->is_handle_added[hash] = TRUE;
+ csc->relocs_hashlist[hash] = reloc;
+ csc->reloc_indices_hashlist[hash] = csc->crelocs;
+
+ csc->chunks[1].length_dw += RELOC_DWORDS;
+ csc->crelocs++;
+
+ *added_domains = rd | wd;
+}
+
+static void radeon_drm_cs_add_reloc(struct r300_winsys_cs *rcs,
+ struct r300_winsys_cs_handle *buf,
+ enum r300_buffer_domain rd,
+ enum r300_buffer_domain wd)
+{
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct radeon_bo *bo = (struct radeon_bo*)buf;
+ enum r300_buffer_domain added_domains;
+
+ radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains);
+
+ if (!added_domains)
+ return;
+
+ if (added_domains & R300_DOMAIN_GTT)
+ cs->csc->used_gart += bo->size;
+ if (added_domains & R300_DOMAIN_VRAM)
+ cs->csc->used_vram += bo->size;
+}
+
+static boolean radeon_drm_cs_validate(struct r300_winsys_cs *rcs)
+{
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+
+ return cs->csc->used_gart < cs->ws->gart_size * 0.8 &&
+ cs->csc->used_vram < cs->ws->vram_size * 0.8;
+}
+
+static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs,
+ struct r300_winsys_cs_handle *buf)
+{
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct radeon_bo *bo = (struct radeon_bo*)buf;
+
+ unsigned index = radeon_get_reloc(cs->csc, bo);
+
+ if (index == -1) {
+ fprintf(stderr, "r300: Cannot get a relocation in %s.\n", __func__);
+ return;
+ }
+
+ OUT_CS(&cs->base, 0xc0001000);
+ OUT_CS(&cs->base, index * RELOC_DWORDS);
+}
+
+static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param)
+{
+ struct radeon_cs_context *csc = (struct radeon_cs_context*)param;
+ unsigned i;
+
+ if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
+ &csc->cs, sizeof(struct drm_radeon_cs))) {
+ if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
+ unsigned i;
+
+ fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
+ for (i = 0; i < csc->chunks[0].length_dw; i++) {
+ fprintf(stderr, "0x%08X\n", csc->buf[i]);
+ }
+ } else {
+ fprintf(stderr, "radeon: The kernel rejected CS, "
+ "see dmesg for more information.\n");
+ }
+ }
+
+ for (i = 0; i < csc->crelocs; i++)
+ p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
+ return NULL;
+}
+
+void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs)
+{
+ /* Wait for any pending ioctl to complete. */
+ if (cs->thread) {
+ pipe_thread_wait(cs->thread);
+ cs->thread = 0;
+ }
+}
+
+DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
+
+static void radeon_drm_cs_flush(struct r300_winsys_cs *rcs, unsigned flags)
+{
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct radeon_cs_context *tmp;
+
+ radeon_drm_cs_sync_flush(cs);
+
+ /* If the CS is not empty, emit it in a newly-spawned thread. */
+ if (cs->base.cdw) {
+ unsigned i, crelocs = cs->csc->crelocs;
+
+ cs->csc->chunks[0].length_dw = cs->base.cdw;
+
+ for (i = 0; i < crelocs; i++)
+ p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls);
+
+ if (cs->ws->num_cpus > 1 && debug_get_option_thread() &&
+ (flags & R300_FLUSH_ASYNC)) {
+ cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs->csc);
+ assert(cs->thread);
+ } else {
+ radeon_drm_cs_emit_ioctl(cs->csc);
+ }
+ }
+
+ /* Flip command streams. */
+ tmp = cs->csc;
+ cs->csc = cs->cst;
+ cs->cst = tmp;
+
+ /* Prepare a new CS. */
+ radeon_cs_context_cleanup(cs->csc);
+
+ cs->base.buf = cs->csc->buf;
+ cs->base.cdw = 0;
+}
+
+static void radeon_drm_cs_destroy(struct r300_winsys_cs *rcs)
+{
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ radeon_drm_cs_sync_flush(cs);
+ radeon_cs_context_cleanup(&cs->csc1);
+ radeon_cs_context_cleanup(&cs->csc2);
+ p_atomic_dec(&cs->ws->num_cs);
+ radeon_destroy_cs_context(&cs->csc1);
+ radeon_destroy_cs_context(&cs->csc2);
+ FREE(cs);
+}
+
+static void radeon_drm_cs_set_flush(struct r300_winsys_cs *rcs,
+ void (*flush)(void *ctx, unsigned flags),
+ void *user)
+{
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ cs->flush_cs = flush;
+ cs->flush_data = user;
+}
+
+static boolean radeon_bo_is_referenced(struct r300_winsys_cs *rcs,
+ struct r300_winsys_cs_handle *_buf)
+{
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct radeon_bo *bo = (struct radeon_bo*)_buf;
+
+ return radeon_bo_is_referenced_by_cs(cs, bo);
+}
+
+void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
+{
+ ws->base.cs_create = radeon_drm_cs_create;
+ ws->base.cs_destroy = radeon_drm_cs_destroy;
+ ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
+ ws->base.cs_validate = radeon_drm_cs_validate;
+ ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
+ ws->base.cs_flush = radeon_drm_cs_flush;
+ ws->base.cs_set_flush = radeon_drm_cs_set_flush;
+ ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
+}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
new file mode 100644
index 0000000000..dfaa161c31
--- /dev/null
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#ifndef RADEON_DRM_CS_H
+#define RADEON_DRM_CS_H
+
+#include "radeon_drm_bo.h"
+#include <radeon_drm.h>
+
+struct radeon_cs_context {
+ uint32_t buf[R300_MAX_CMDBUF_DWORDS];
+
+ int fd;
+ struct drm_radeon_cs cs;
+ struct drm_radeon_cs_chunk chunks[2];
+ uint64_t chunk_array[2];
+
+ /* Relocs. */
+ unsigned nrelocs;
+ unsigned crelocs;
+ struct radeon_bo **relocs_bo;
+ struct drm_radeon_cs_reloc *relocs;
+
+ /* 0 = BO not added, 1 = BO added */
+ char is_handle_added[256];
+ struct drm_radeon_cs_reloc *relocs_hashlist[256];
+ unsigned reloc_indices_hashlist[256];
+
+ unsigned used_vram;
+ unsigned used_gart;
+};
+
+struct radeon_drm_cs {
+ struct r300_winsys_cs base;
+
+ /* We flip between these two CS. While one is being consumed
+ * by the kernel in another thread, the other one is being filled
+ * by the pipe driver. */
+ struct radeon_cs_context csc1;
+ struct radeon_cs_context csc2;
+ /* The currently-used CS. */
+ struct radeon_cs_context *csc;
+ /* The CS being currently-owned by the other thread. */
+ struct radeon_cs_context *cst;
+
+ /* The winsys. */
+ struct radeon_drm_winsys *ws;
+
+ /* Flush CS. */
+ void (*flush_cs)(void *ctx, unsigned flags);
+ void *flush_data;
+
+ pipe_thread thread;
+};
+
+int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
+
+static INLINE struct radeon_drm_cs *
+radeon_drm_cs(struct r300_winsys_cs *base)
+{
+ return (struct radeon_drm_cs*)base;
+}
+
+static INLINE boolean radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
+ struct radeon_bo *bo)
+{
+ return bo->num_cs_references == bo->rws->num_cs ||
+ (bo->num_cs_references && radeon_get_reloc(cs->csc, bo) != -1);
+}
+
+static INLINE boolean radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
+{
+ return bo->num_cs_references;
+}
+
+void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs);
+void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws);
+
+#endif
diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c
deleted file mode 100644
index 9f59b3de46..0000000000
--- a/src/gallium/winsys/radeon/drm/radeon_r300.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "radeon_drm_buffer.h"
-
-#include "util/u_memory.h"
-#include "pipebuffer/pb_bufmgr.h"
-
-#include "radeon_cs_gem.h"
-#include "state_tracker/drm_driver.h"
-
-static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage,
- enum r300_buffer_domain domain)
-{
- unsigned res = 0;
-
- if (bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT))
- res |= PB_USAGE_GPU_WRITE;
-
- if (bind & PIPE_BIND_SAMPLER_VIEW)
- res |= PB_USAGE_GPU_READ | PB_USAGE_GPU_WRITE;
-
- if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
- res |= PB_USAGE_GPU_READ;
-
- if (bind & PIPE_BIND_TRANSFER_WRITE)
- res |= PB_USAGE_CPU_WRITE;
-
- if (bind & PIPE_BIND_TRANSFER_READ)
- res |= PB_USAGE_CPU_READ;
-
- /* Is usage of any use for us? Probably not. */
-
- /* Now add driver-specific usage flags. */
- if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
- res |= RADEON_PB_USAGE_VERTEX;
-
- if (domain & R300_DOMAIN_GTT)
- res |= RADEON_PB_USAGE_DOMAIN_GTT;
-
- if (domain & R300_DOMAIN_VRAM)
- res |= RADEON_PB_USAGE_DOMAIN_VRAM;
-
- return res;
-}
-
-static struct r300_winsys_buffer *
-radeon_r300_winsys_buffer_create(struct r300_winsys_screen *rws,
- unsigned size,
- unsigned alignment,
- unsigned bind,
- unsigned usage,
- enum r300_buffer_domain domain)
-{
- struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
- struct pb_desc desc;
- struct pb_manager *provider;
- struct pb_buffer *buffer;
-
- memset(&desc, 0, sizeof(desc));
- desc.alignment = alignment;
- desc.usage = get_pb_usage_from_create_flags(bind, usage, domain);
-
- /* Assign a buffer manager. */
- if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
- provider = ws->cman;
- else
- provider = ws->kman;
-
- buffer = provider->create_buffer(provider, size, &desc);
- if (!buffer)
- return NULL;
-
- return (struct r300_winsys_buffer*)buffer;
-}
-
-static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws,
- struct r300_winsys_buffer **pdst,
- struct r300_winsys_buffer *src)
-{
- struct pb_buffer *_src = radeon_pb_buffer(src);
- struct pb_buffer *_dst = radeon_pb_buffer(*pdst);
-
- pb_reference(&_dst, _src);
-
- *pdst = (struct r300_winsys_buffer*)_dst;
-}
-
-static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r300_winsys_screen *rws,
- struct winsys_handle *whandle,
- unsigned *stride,
- unsigned *size)
-{
- struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
- struct pb_buffer *_buf;
-
- _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, whandle->handle);
-
- if (stride)
- *stride = whandle->stride;
- if (size)
- *size = _buf->base.size;
-
- return (struct r300_winsys_buffer*)_buf;
-}
-
-static boolean radeon_r300_winsys_buffer_get_handle(struct r300_winsys_screen *rws,
- struct r300_winsys_buffer *buffer,
- unsigned stride,
- struct winsys_handle *whandle)
-{
- struct pb_buffer *_buf = radeon_pb_buffer(buffer);
- whandle->stride = stride;
- return radeon_drm_bufmgr_get_handle(_buf, whandle);
-}
-
-static void radeon_r300_winsys_cs_set_flush(struct r300_winsys_cs *rcs,
- void (*flush)(void *),
- void *user)
-{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- cs->flush_cs = flush;
- cs->flush_data = user;
- radeon_cs_space_set_flush(cs->cs, flush, user);
-}
-
-static boolean radeon_r300_winsys_cs_validate(struct r300_winsys_cs *rcs)
-{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
-
- return radeon_cs_space_check(cs->cs) >= 0;
-}
-
-static void radeon_r300_winsys_cs_reset_buffers(struct r300_winsys_cs *rcs)
-{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- radeon_cs_space_reset_bos(cs->cs);
-}
-
-static void radeon_r300_winsys_cs_flush(struct r300_winsys_cs *rcs)
-{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- int retval;
-
- /* Don't flush a zero-sized CS. */
- if (!cs->base.cdw) {
- return;
- }
-
- cs->cs->cdw = cs->base.cdw;
-
- radeon_drm_bufmgr_flush_maps(cs->ws->kman);
-
- /* Emit the CS. */
- retval = radeon_cs_emit(cs->cs);
- if (retval) {
- if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
- fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
- radeon_cs_print(cs->cs, stderr);
- } else {
- fprintf(stderr, "radeon: The kernel rejected CS, "
- "see dmesg for more information.\n");
- }
- }
-
- /* Reset CS.
- * Someday, when we care about performance, we should really find a way
- * to rotate between two or three CS objects so that the GPU can be
- * spinning through one CS while another one is being filled. */
- radeon_cs_erase(cs->cs);
-
- cs->base.buf = cs->cs->packets;
- cs->base.cdw = cs->cs->cdw;
-}
-
-static uint32_t radeon_get_value(struct r300_winsys_screen *rws,
- enum r300_value_id id)
-{
- struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws;
-
- switch(id) {
- case R300_VID_PCI_ID:
- return ws->pci_id;
- case R300_VID_GB_PIPES:
- return ws->gb_pipes;
- case R300_VID_Z_PIPES:
- return ws->z_pipes;
- case R300_VID_SQUARE_TILING_SUPPORT:
- return ws->squaretiling;
- case R300_VID_DRM_2_3_0:
- return ws->drm_2_3_0;
- case R300_VID_DRM_2_6_0:
- return ws->drm_2_6_0;
- case R300_CAN_HYPERZ:
- return ws->hyperz;
- }
- return 0;
-}
-
-static struct r300_winsys_cs *radeon_r300_winsys_cs_create(struct r300_winsys_screen *rws)
-{
- struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
- struct radeon_drm_cs *cs = CALLOC_STRUCT(radeon_drm_cs);
-
- if (!cs)
- return NULL;
-
- /* Size limit on IBs is 64 kibibytes. */
- cs->cs = radeon_cs_create(ws->csm, 1024 * 64 / 4);
- if (!cs->cs) {
- FREE(cs);
- return NULL;
- }
-
- radeon_cs_set_limit(cs->cs,
- RADEON_GEM_DOMAIN_GTT, ws->gart_size);
- radeon_cs_set_limit(cs->cs,
- RADEON_GEM_DOMAIN_VRAM, ws->vram_size);
-
- cs->ws = ws;
- cs->base.buf = cs->cs->packets;
- cs->base.cdw = cs->cs->cdw;
- return &cs->base;
-}
-
-static void radeon_r300_winsys_cs_destroy(struct r300_winsys_cs *rcs)
-{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- radeon_cs_destroy(cs->cs);
- FREE(cs);
-}
-
-void radeon_winsys_init_functions(struct radeon_drm_winsys *ws)
-{
- ws->base.get_value = radeon_get_value;
- ws->base.buffer_create = radeon_r300_winsys_buffer_create;
- ws->base.buffer_reference = radeon_r300_winsys_buffer_reference;
- ws->base.buffer_from_handle = radeon_r300_winsys_buffer_from_handle;
- ws->base.buffer_get_handle = radeon_r300_winsys_buffer_get_handle;
- ws->base.cs_create = radeon_r300_winsys_cs_create;
- ws->base.cs_destroy = radeon_r300_winsys_cs_destroy;
- ws->base.cs_validate = radeon_r300_winsys_cs_validate;
- ws->base.cs_flush = radeon_r300_winsys_cs_flush;
- ws->base.cs_reset_buffers = radeon_r300_winsys_cs_reset_buffers;
- ws->base.cs_set_flush = radeon_r300_winsys_cs_set_flush;
-}
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 81da1a25e0..9ecbb07457 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -36,8 +36,8 @@ struct radeon_drm_winsys {
struct r300_winsys_screen base;
int fd; /* DRM file descriptor */
+ int num_cs; /* The number of command streams created. */
- struct radeon_bo_manager *bom; /* Radeon BO manager. */
struct pb_manager *kman;
struct pb_manager *cman;
@@ -46,44 +46,22 @@ struct radeon_drm_winsys {
uint32_t z_pipes; /* Z pipe count (rv530 only) */
uint32_t gart_size; /* GART size. */
uint32_t vram_size; /* VRAM size. */
- boolean squaretiling; /* Square tiling support. */
- /* DRM 2.3.0 (R500 VAP regs, MSPOS regs, fixed tex3D size checking) */
- boolean drm_2_3_0;
- /* DRM 2.6.0 (Hyper-Z, GB_Z_PEQ_CONFIG allowed on rv350->r4xx) */
- boolean drm_2_6_0;
- /* Hyper-Z user */
- boolean hyperz;
-
- /* Radeon CS manager. */
- struct radeon_cs_manager *csm;
-};
-
-struct radeon_drm_cs {
- struct r300_winsys_cs base;
-
- /* The winsys. */
- struct radeon_drm_winsys *ws;
+ uint32_t num_cpus; /* Number of CPUs. */
- /* The libdrm command stream. */
- struct radeon_cs *cs;
+ unsigned drm_major;
+ unsigned drm_minor;
+ unsigned drm_patchlevel;
- /* Flush CS. */
- void (*flush_cs)(void *);
- void *flush_data;
+ /* Hyper-Z user */
+ boolean hyperz;
+ /* AA compression (CMask) */
+ boolean aacompress;
};
-static INLINE struct radeon_drm_cs *
-radeon_drm_cs(struct r300_winsys_cs *base)
-{
- return (struct radeon_drm_cs*)base;
-}
-
static INLINE struct radeon_drm_winsys *
radeon_drm_winsys(struct r300_winsys_screen *base)
{
return (struct radeon_drm_winsys*)base;
}
-void radeon_winsys_init_functions(struct radeon_drm_winsys *ws);
-
#endif
diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.c b/src/gallium/winsys/svga/drm/vmw_buffer.c
index eca174a6c5..80d5dfc3ed 100644
--- a/src/gallium/winsys/svga/drm/vmw_buffer.c
+++ b/src/gallium/winsys/svga/drm/vmw_buffer.c
@@ -120,7 +120,8 @@ vmw_gmr_buffer_destroy(struct pb_buffer *_buf)
static void *
vmw_gmr_buffer_map(struct pb_buffer *_buf,
- unsigned flags)
+ unsigned flags,
+ void *flush_ctx)
{
struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf);
return buf->map;
diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c
index 11626ee637..f2124c1bf6 100644
--- a/src/gallium/winsys/svga/drm/vmw_context.c
+++ b/src/gallium/winsys/svga/drm/vmw_context.c
@@ -320,7 +320,7 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc,
* to the FIFO won't cause flushing in the host.
*/
vswc->seen_regions += reloc->buffer->base.size;
- if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/2)
+ if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/3)
vswc->preemptive_flush = TRUE;
}
diff --git a/src/gallium/winsys/svga/drm/vmw_screen.c b/src/gallium/winsys/svga/drm/vmw_screen.c
index 51a4c55e5a..cc3003d252 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen.c
@@ -45,8 +45,6 @@ vmw_winsys_create( int fd, boolean use_old_scanout_flag )
vws->ioctl.drm_fd = fd;
vws->use_old_scanout_flag = use_old_scanout_flag;
- debug_printf("%s: use_old_scanout_flag == %s\n", __FUNCTION__,
- use_old_scanout_flag ? "true" : "false");
if (!vmw_ioctl_init(vws))
goto out_no_ioctl;
diff --git a/src/gallium/winsys/sw/dri/SConscript b/src/gallium/winsys/sw/dri/SConscript
index b255d725f9..f8e1fa6cfd 100644
--- a/src/gallium/winsys/sw/dri/SConscript
+++ b/src/gallium/winsys/sw/dri/SConscript
@@ -4,7 +4,7 @@
Import('*')
-if env['platform'] == 'linux':
+if env['platform'] in ('linux', 'sunos'):
env = env.Clone()
diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
index 8f9a90858d..51245766d1 100644
--- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
+++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
@@ -61,10 +61,8 @@ struct wrapper_sw_displaytarget
struct pipe_resource *tex;
struct pipe_transfer *transfer;
- unsigned width;
- unsigned height;
unsigned map_count;
- unsigned stride; /**< because we give stride at create */
+ unsigned stride; /**< because we get stride at create */
void *ptr;
};
@@ -95,7 +93,7 @@ wsw_dt_get_stride(struct wrapper_sw_displaytarget *wdt, unsigned *stride)
tr = pipe_get_transfer(pipe, tex, 0, 0,
PIPE_TRANSFER_READ_WRITE,
- 0, 0, wdt->width, wdt->height);
+ 0, 0, wdt->tex->width0, wdt->tex->height0);
if (!tr)
return FALSE;
@@ -208,7 +206,7 @@ wsw_dt_map(struct sw_winsys *ws,
tr = pipe_get_transfer(pipe, tex, 0, 0,
PIPE_TRANSFER_READ_WRITE,
- 0, 0, wdt->width, wdt->height);
+ 0, 0, wdt->tex->width0, wdt->tex->height0);
if (!tr)
return NULL;
@@ -248,6 +246,7 @@ wsw_dt_unmap(struct sw_winsys *ws,
pipe->transfer_unmap(pipe, wdt->transfer);
pipe->transfer_destroy(pipe, wdt->transfer);
+ pipe->flush(pipe, NULL);
wdt->transfer = NULL;
}
diff --git a/src/gallium/winsys/sw/xlib/SConscript b/src/gallium/winsys/sw/xlib/SConscript
index df01a9ec2b..e9c274059b 100644
--- a/src/gallium/winsys/sw/xlib/SConscript
+++ b/src/gallium/winsys/sw/xlib/SConscript
@@ -4,7 +4,7 @@
Import('*')
-if env['platform'] in ('cygwin', 'linux'):
+if env['platform'] in ('cygwin', 'linux', 'sunos'):
env = env.Clone()